EVALUACIÓN DE MODELOS

1. Importación y estandarización de la información

load("../data/Frecuencia_De_Accidentes_Semanal.Rda")
load("../data/Dias_Especiales_Semanal.Rda")

Se crean las columnas de accidentes Graves y leves para saber la frecuencia por día

library(reshape)
## Warning: package 'reshape' was built under R version 3.5.3
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
Total_Dataset_Freq_S <- cast(Total_Dataset_Freq_S[,c(1,3,4,5)],ANO+SEMANA~GRAVEDAD)
## Using FREQ as value column.  Use the value argument to cast to override this choice

Se agrega la columna TOTAL_ACCIDENTES

Total_Dataset_Freq_S$TOTAL_ACCIDENTES <- Total_Dataset_Freq_S$ACCIDENTES_GRAVES + Total_Dataset_Freq_S$ACCIDENTES_LEVES
Total_Dataset_Freq_S <- sqldf("SELECT* 
              FROM Total_Dataset_Freq_S FS
              LEFT JOIN Dias_Especiales_Semanal DES 
              ON (FS.ANO=DES.ANO AND FS.SEMANA=DES.SEMANA)")
library(dplyr)
Total_Dataset_Freq_S<-unite_(Total_Dataset_Freq_S, "Ano_Sem", c("ANO..6","SEMANA..8"))
save(Total_Dataset_Freq_S,file="../Modelos/Total_Dataset_Freq_S_semanal.Rda")

2. Partición de los datos para entrenamiento y test

Se ajustarán modelos con la información disponible desde el 01 de enero de 2014 hasta el 31 de diciembre de 2017 y se utilizará el año 2018 para validar el modelo:

Train_S_Dataset <- subset(Total_Dataset_Freq_S, ANO!="2018")
summary(Train_S_Dataset$ANO)
##    Length     Class      Mode 
##       210 character character

Se ajustan otra vez los niveles del factor ANO

Train_S_Dataset$ANO <- factor(Train_S_Dataset$ANO)
summary(Train_S_Dataset$ANO)
## 2014 2015 2016 2017 
##   52   53   53   52
library(sqldf)
Test_S_Dataset <- sqldf("SELECT *  
       FROM Total_Dataset_Freq_S
       WHERE ANO == 2018")
summary(Test_S_Dataset$ANO)
##    Length     Class      Mode 
##        52 character character

Se ajustan otra vez los niveles del factor ANO

Test_S_Dataset$ANO <- factor(Test_S_Dataset$ANO)
summary(Test_S_Dataset$ANO)
## 2018 
##   52

3. Selección de las mejores variables para el modelo

Se utilizará el método forward selection para elegir las mejores variables explicativas del modelo teniendo como criterio aquellas variables que presente mejor R^2 ajustado

Selección de variables para Total accidentes

library (leaps)
## Warning: package 'leaps' was built under R version 3.5.3
regfit.fwd=regsubsets (TOTAL_ACCIDENTES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros,Train_S_Dataset, method ="forward", nvmax= 80)
summary (regfit.fwd)
## Subset selection object
## Call: regsubsets.formula(TOTAL_ACCIDENTES ~ Ano_Base + Ano_Base + SEMANA + 
##     Feria_Flores_Semana + Semana_Santa_Semana + Feriados_Lunes + 
##     Feriados_Otros, Train_S_Dataset, method = "forward", nvmax = 80)
## 57 Variables  (and intercept)
##                     Forced in Forced out
## Ano_Base                FALSE      FALSE
## SEMANA02                FALSE      FALSE
## SEMANA03                FALSE      FALSE
## SEMANA04                FALSE      FALSE
## SEMANA05                FALSE      FALSE
## SEMANA06                FALSE      FALSE
## SEMANA07                FALSE      FALSE
## SEMANA08                FALSE      FALSE
## SEMANA09                FALSE      FALSE
## SEMANA10                FALSE      FALSE
## SEMANA11                FALSE      FALSE
## SEMANA12                FALSE      FALSE
## SEMANA13                FALSE      FALSE
## SEMANA14                FALSE      FALSE
## SEMANA15                FALSE      FALSE
## SEMANA16                FALSE      FALSE
## SEMANA17                FALSE      FALSE
## SEMANA18                FALSE      FALSE
## SEMANA19                FALSE      FALSE
## SEMANA20                FALSE      FALSE
## SEMANA21                FALSE      FALSE
## SEMANA22                FALSE      FALSE
## SEMANA23                FALSE      FALSE
## SEMANA24                FALSE      FALSE
## SEMANA25                FALSE      FALSE
## SEMANA26                FALSE      FALSE
## SEMANA27                FALSE      FALSE
## SEMANA28                FALSE      FALSE
## SEMANA29                FALSE      FALSE
## SEMANA30                FALSE      FALSE
## SEMANA31                FALSE      FALSE
## SEMANA32                FALSE      FALSE
## SEMANA33                FALSE      FALSE
## SEMANA34                FALSE      FALSE
## SEMANA35                FALSE      FALSE
## SEMANA36                FALSE      FALSE
## SEMANA37                FALSE      FALSE
## SEMANA38                FALSE      FALSE
## SEMANA39                FALSE      FALSE
## SEMANA40                FALSE      FALSE
## SEMANA41                FALSE      FALSE
## SEMANA42                FALSE      FALSE
## SEMANA43                FALSE      FALSE
## SEMANA44                FALSE      FALSE
## SEMANA45                FALSE      FALSE
## SEMANA46                FALSE      FALSE
## SEMANA47                FALSE      FALSE
## SEMANA48                FALSE      FALSE
## SEMANA49                FALSE      FALSE
## SEMANA50                FALSE      FALSE
## SEMANA51                FALSE      FALSE
## SEMANA52                FALSE      FALSE
## SEMANA53                FALSE      FALSE
## Feria_Flores_Semana     FALSE      FALSE
## Semana_Santa_Semana     FALSE      FALSE
## Feriados_Lunes          FALSE      FALSE
## Feriados_Otros          FALSE      FALSE
## 1 subsets of each size up to 57
## Selection Algorithm: forward
##           Ano_Base SEMANA02 SEMANA03 SEMANA04 SEMANA05 SEMANA06 SEMANA07
## 1  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 2  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 3  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 4  ( 1 )  " "      "*"      " "      " "      " "      " "      " "     
## 5  ( 1 )  " "      "*"      " "      " "      " "      " "      " "     
## 6  ( 1 )  " "      "*"      " "      " "      " "      " "      " "     
## 7  ( 1 )  " "      "*"      " "      " "      " "      " "      " "     
## 8  ( 1 )  " "      "*"      " "      " "      " "      " "      " "     
## 9  ( 1 )  " "      "*"      "*"      " "      " "      " "      " "     
## 10  ( 1 ) " "      "*"      "*"      "*"      " "      " "      " "     
## 11  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 12  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 13  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 14  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 15  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 16  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 17  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 18  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 19  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 20  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 21  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 22  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 23  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 24  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 25  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 26  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 27  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 28  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 29  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 30  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      " "     
## 31  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      "*"     
## 32  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      "*"     
## 33  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      "*"     
## 34  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      "*"     
## 35  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      "*"     
## 36  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      "*"     
## 37  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 38  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 39  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 40  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 41  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 42  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 43  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 44  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 45  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 46  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 47  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 48  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 49  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 50  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 51  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 52  ( 1 ) "*"      "*"      "*"      "*"      " "      "*"      "*"     
## 53  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 54  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 55  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 56  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 57  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
##           SEMANA08 SEMANA09 SEMANA10 SEMANA11 SEMANA12 SEMANA13 SEMANA14
## 1  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 2  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 3  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 4  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 5  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 6  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 7  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 8  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 9  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 10  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 11  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 12  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 13  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 14  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 15  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 16  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 17  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 18  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 19  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 20  ( 1 ) " "      " "      "*"      " "      " "      " "      " "     
## 21  ( 1 ) " "      " "      "*"      " "      " "      " "      " "     
## 22  ( 1 ) " "      " "      "*"      " "      " "      " "      "*"     
## 23  ( 1 ) " "      " "      "*"      " "      " "      " "      "*"     
## 24  ( 1 ) " "      " "      "*"      " "      " "      " "      "*"     
## 25  ( 1 ) " "      " "      "*"      " "      " "      " "      "*"     
## 26  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 27  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 28  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 29  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 30  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 31  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 32  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 33  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 34  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 35  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 36  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 37  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 38  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 39  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 40  ( 1 ) " "      " "      "*"      "*"      " "      " "      "*"     
## 41  ( 1 ) "*"      " "      "*"      "*"      " "      " "      "*"     
## 42  ( 1 ) "*"      " "      "*"      "*"      " "      " "      "*"     
## 43  ( 1 ) "*"      " "      "*"      "*"      " "      " "      "*"     
## 44  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      "*"     
## 45  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      "*"     
## 46  ( 1 ) "*"      "*"      "*"      "*"      " "      " "      "*"     
## 47  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 48  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 49  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 50  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 51  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 52  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 53  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 54  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 55  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 56  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 57  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
##           SEMANA15 SEMANA16 SEMANA17 SEMANA18 SEMANA19 SEMANA20 SEMANA21
## 1  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 2  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 3  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 4  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 5  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 6  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 7  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 8  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 9  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 10  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 11  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 12  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 13  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 14  ( 1 ) " "      " "      " "      "*"      " "      " "      " "     
## 15  ( 1 ) " "      " "      " "      "*"      " "      " "      " "     
## 16  ( 1 ) " "      " "      "*"      "*"      " "      " "      " "     
## 17  ( 1 ) " "      " "      "*"      "*"      " "      " "      " "     
## 18  ( 1 ) " "      " "      "*"      "*"      " "      " "      " "     
## 19  ( 1 ) " "      " "      "*"      "*"      " "      " "      " "     
## 20  ( 1 ) " "      " "      "*"      "*"      " "      " "      " "     
## 21  ( 1 ) " "      " "      "*"      "*"      " "      " "      " "     
## 22  ( 1 ) " "      " "      "*"      "*"      " "      " "      " "     
## 23  ( 1 ) " "      " "      "*"      "*"      " "      " "      " "     
## 24  ( 1 ) " "      " "      "*"      "*"      " "      " "      " "     
## 25  ( 1 ) "*"      " "      "*"      "*"      " "      " "      " "     
## 26  ( 1 ) "*"      " "      "*"      "*"      " "      " "      " "     
## 27  ( 1 ) "*"      " "      "*"      "*"      " "      " "      " "     
## 28  ( 1 ) "*"      " "      "*"      "*"      " "      " "      " "     
## 29  ( 1 ) "*"      " "      "*"      "*"      " "      " "      " "     
## 30  ( 1 ) "*"      " "      "*"      "*"      " "      "*"      " "     
## 31  ( 1 ) "*"      " "      "*"      "*"      " "      "*"      " "     
## 32  ( 1 ) "*"      " "      "*"      "*"      " "      "*"      " "     
## 33  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      " "     
## 34  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      " "     
## 35  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 36  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 37  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 38  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 39  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 40  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 41  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 42  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 43  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 44  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 45  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 46  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 47  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 48  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 49  ( 1 ) "*"      " "      "*"      "*"      "*"      "*"      "*"     
## 50  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 51  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 52  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 53  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 54  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 55  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 56  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 57  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
##           SEMANA22 SEMANA23 SEMANA24 SEMANA25 SEMANA26 SEMANA27 SEMANA28
## 1  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 2  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 3  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 4  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 5  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 6  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 7  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 8  ( 1 )  " "      " "      " "      " "      "*"      " "      " "     
## 9  ( 1 )  " "      " "      " "      " "      "*"      " "      " "     
## 10  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 11  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 12  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 13  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 14  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 15  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 16  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 17  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 18  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 19  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 20  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 21  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 22  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 23  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 24  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 25  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 26  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 27  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 28  ( 1 ) " "      " "      " "      " "      "*"      " "      " "     
## 29  ( 1 ) " "      "*"      " "      " "      "*"      " "      " "     
## 30  ( 1 ) " "      "*"      " "      " "      "*"      " "      " "     
## 31  ( 1 ) " "      "*"      " "      " "      "*"      " "      " "     
## 32  ( 1 ) " "      "*"      " "      " "      "*"      " "      " "     
## 33  ( 1 ) " "      "*"      " "      " "      "*"      " "      " "     
## 34  ( 1 ) " "      "*"      " "      " "      "*"      " "      " "     
## 35  ( 1 ) " "      "*"      " "      " "      "*"      " "      " "     
## 36  ( 1 ) " "      "*"      " "      " "      "*"      " "      " "     
## 37  ( 1 ) " "      "*"      " "      " "      "*"      " "      " "     
## 38  ( 1 ) " "      "*"      " "      " "      "*"      " "      " "     
## 39  ( 1 ) " "      "*"      " "      " "      "*"      " "      " "     
## 40  ( 1 ) " "      "*"      "*"      " "      "*"      " "      " "     
## 41  ( 1 ) " "      "*"      "*"      " "      "*"      " "      " "     
## 42  ( 1 ) " "      "*"      "*"      " "      "*"      " "      " "     
## 43  ( 1 ) " "      "*"      "*"      " "      "*"      " "      "*"     
## 44  ( 1 ) " "      "*"      "*"      " "      "*"      " "      "*"     
## 45  ( 1 ) " "      "*"      "*"      " "      "*"      " "      "*"     
## 46  ( 1 ) " "      "*"      "*"      " "      "*"      " "      "*"     
## 47  ( 1 ) " "      "*"      "*"      " "      "*"      " "      "*"     
## 48  ( 1 ) " "      "*"      "*"      " "      "*"      " "      "*"     
## 49  ( 1 ) " "      "*"      "*"      " "      "*"      " "      "*"     
## 50  ( 1 ) " "      "*"      "*"      " "      "*"      " "      "*"     
## 51  ( 1 ) " "      "*"      "*"      " "      "*"      " "      "*"     
## 52  ( 1 ) " "      "*"      "*"      " "      "*"      " "      "*"     
## 53  ( 1 ) " "      "*"      "*"      " "      "*"      " "      "*"     
## 54  ( 1 ) "*"      "*"      "*"      " "      "*"      " "      "*"     
## 55  ( 1 ) "*"      "*"      "*"      " "      "*"      "*"      "*"     
## 56  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 57  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
##           SEMANA29 SEMANA30 SEMANA31 SEMANA32 SEMANA33 SEMANA34 SEMANA35
## 1  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 2  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 3  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 4  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 5  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 6  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 7  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 8  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 9  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 10  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 11  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 12  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 13  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 14  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 15  ( 1 ) "*"      " "      " "      " "      " "      " "      " "     
## 16  ( 1 ) "*"      " "      " "      " "      " "      " "      " "     
## 17  ( 1 ) "*"      " "      " "      " "      " "      " "      " "     
## 18  ( 1 ) "*"      " "      " "      " "      " "      " "      " "     
## 19  ( 1 ) "*"      " "      " "      " "      " "      " "      " "     
## 20  ( 1 ) "*"      " "      " "      " "      " "      " "      " "     
## 21  ( 1 ) "*"      " "      " "      " "      " "      " "      " "     
## 22  ( 1 ) "*"      " "      " "      " "      " "      " "      " "     
## 23  ( 1 ) "*"      " "      " "      " "      " "      " "      " "     
## 24  ( 1 ) "*"      " "      " "      " "      "*"      " "      " "     
## 25  ( 1 ) "*"      " "      " "      " "      "*"      " "      " "     
## 26  ( 1 ) "*"      " "      " "      " "      "*"      " "      " "     
## 27  ( 1 ) "*"      "*"      " "      " "      "*"      " "      " "     
## 28  ( 1 ) "*"      "*"      " "      " "      "*"      " "      "*"     
## 29  ( 1 ) "*"      "*"      " "      " "      "*"      " "      "*"     
## 30  ( 1 ) "*"      "*"      " "      " "      "*"      " "      "*"     
## 31  ( 1 ) "*"      "*"      " "      " "      "*"      " "      "*"     
## 32  ( 1 ) "*"      "*"      "*"      " "      "*"      " "      "*"     
## 33  ( 1 ) "*"      "*"      "*"      " "      "*"      " "      "*"     
## 34  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 35  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 36  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 37  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 38  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 39  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 40  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 41  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 42  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 43  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 44  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 45  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 46  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 47  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 48  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 49  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 50  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 51  ( 1 ) "*"      "*"      "*"      "*"      "*"      " "      "*"     
## 52  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 53  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 54  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 55  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 56  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 57  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
##           SEMANA36 SEMANA37 SEMANA38 SEMANA39 SEMANA40 SEMANA41 SEMANA42
## 1  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 2  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 3  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 4  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 5  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 6  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 7  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 8  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 9  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 10  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 11  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 12  ( 1 ) " "      " "      " "      " "      " "      "*"      " "     
## 13  ( 1 ) " "      " "      " "      " "      " "      "*"      " "     
## 14  ( 1 ) " "      " "      " "      " "      " "      "*"      " "     
## 15  ( 1 ) " "      " "      " "      " "      " "      "*"      " "     
## 16  ( 1 ) " "      " "      " "      " "      " "      "*"      " "     
## 17  ( 1 ) " "      "*"      " "      " "      " "      "*"      " "     
## 18  ( 1 ) " "      "*"      "*"      " "      " "      "*"      " "     
## 19  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 20  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 21  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 22  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 23  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 24  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 25  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 26  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 27  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 28  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 29  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 30  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 31  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 32  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 33  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 34  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 35  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 36  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 37  ( 1 ) " "      "*"      "*"      " "      "*"      "*"      " "     
## 38  ( 1 ) "*"      "*"      "*"      " "      "*"      "*"      " "     
## 39  ( 1 ) "*"      "*"      "*"      " "      "*"      "*"      " "     
## 40  ( 1 ) "*"      "*"      "*"      " "      "*"      "*"      " "     
## 41  ( 1 ) "*"      "*"      "*"      " "      "*"      "*"      " "     
## 42  ( 1 ) "*"      "*"      "*"      " "      "*"      "*"      " "     
## 43  ( 1 ) "*"      "*"      "*"      " "      "*"      "*"      " "     
## 44  ( 1 ) "*"      "*"      "*"      " "      "*"      "*"      " "     
## 45  ( 1 ) "*"      "*"      "*"      " "      "*"      "*"      " "     
## 46  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      " "     
## 47  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      " "     
## 48  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      " "     
## 49  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      " "     
## 50  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      " "     
## 51  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 52  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 53  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 54  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 55  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 56  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
## 57  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
##           SEMANA43 SEMANA44 SEMANA45 SEMANA46 SEMANA47 SEMANA48 SEMANA49
## 1  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 2  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 3  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 4  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 5  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 6  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 7  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 8  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 9  ( 1 )  " "      " "      " "      " "      " "      " "      " "     
## 10  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 11  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 12  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 13  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 14  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 15  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 16  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 17  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 18  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 19  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 20  ( 1 ) " "      " "      " "      " "      " "      " "      " "     
## 21  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 22  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 23  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 24  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 25  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 26  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 27  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 28  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 29  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 30  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 31  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 32  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 33  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 34  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 35  ( 1 ) " "      " "      " "      " "      " "      " "      "*"     
## 36  ( 1 ) "*"      " "      " "      " "      " "      " "      "*"     
## 37  ( 1 ) "*"      " "      " "      " "      " "      " "      "*"     
## 38  ( 1 ) "*"      " "      " "      " "      " "      " "      "*"     
## 39  ( 1 ) "*"      " "      " "      "*"      " "      " "      "*"     
## 40  ( 1 ) "*"      " "      " "      "*"      " "      " "      "*"     
## 41  ( 1 ) "*"      " "      " "      "*"      " "      " "      "*"     
## 42  ( 1 ) "*"      "*"      " "      "*"      " "      " "      "*"     
## 43  ( 1 ) "*"      "*"      " "      "*"      " "      " "      "*"     
## 44  ( 1 ) "*"      "*"      " "      "*"      " "      " "      "*"     
## 45  ( 1 ) "*"      "*"      " "      "*"      " "      "*"      "*"     
## 46  ( 1 ) "*"      "*"      " "      "*"      " "      "*"      "*"     
## 47  ( 1 ) "*"      "*"      " "      "*"      " "      "*"      "*"     
## 48  ( 1 ) "*"      "*"      " "      "*"      " "      "*"      "*"     
## 49  ( 1 ) "*"      "*"      " "      "*"      "*"      "*"      "*"     
## 50  ( 1 ) "*"      "*"      " "      "*"      "*"      "*"      "*"     
## 51  ( 1 ) "*"      "*"      " "      "*"      "*"      "*"      "*"     
## 52  ( 1 ) "*"      "*"      " "      "*"      "*"      "*"      "*"     
## 53  ( 1 ) "*"      "*"      " "      "*"      "*"      "*"      "*"     
## 54  ( 1 ) "*"      "*"      " "      "*"      "*"      "*"      "*"     
## 55  ( 1 ) "*"      "*"      " "      "*"      "*"      "*"      "*"     
## 56  ( 1 ) "*"      "*"      " "      "*"      "*"      "*"      "*"     
## 57  ( 1 ) "*"      "*"      "*"      "*"      "*"      "*"      "*"     
##           SEMANA50 SEMANA51 SEMANA52 SEMANA53 Feria_Flores_Semana
## 1  ( 1 )  " "      " "      " "      "*"      " "                
## 2  ( 1 )  " "      " "      " "      "*"      " "                
## 3  ( 1 )  " "      " "      " "      "*"      " "                
## 4  ( 1 )  " "      " "      " "      "*"      " "                
## 5  ( 1 )  " "      " "      "*"      "*"      " "                
## 6  ( 1 )  " "      " "      "*"      "*"      "*"                
## 7  ( 1 )  " "      " "      "*"      "*"      "*"                
## 8  ( 1 )  " "      " "      "*"      "*"      "*"                
## 9  ( 1 )  " "      " "      "*"      "*"      "*"                
## 10  ( 1 ) " "      " "      "*"      "*"      "*"                
## 11  ( 1 ) " "      " "      "*"      "*"      "*"                
## 12  ( 1 ) " "      " "      "*"      "*"      "*"                
## 13  ( 1 ) " "      "*"      "*"      "*"      "*"                
## 14  ( 1 ) " "      "*"      "*"      "*"      "*"                
## 15  ( 1 ) " "      "*"      "*"      "*"      "*"                
## 16  ( 1 ) " "      "*"      "*"      "*"      "*"                
## 17  ( 1 ) " "      "*"      "*"      "*"      "*"                
## 18  ( 1 ) " "      "*"      "*"      "*"      "*"                
## 19  ( 1 ) " "      "*"      "*"      "*"      "*"                
## 20  ( 1 ) " "      "*"      "*"      "*"      "*"                
## 21  ( 1 ) " "      "*"      "*"      "*"      "*"                
## 22  ( 1 ) " "      "*"      "*"      "*"      "*"                
## 23  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 24  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 25  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 26  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 27  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 28  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 29  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 30  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 31  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 32  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 33  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 34  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 35  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 36  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 37  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 38  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 39  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 40  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 41  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 42  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 43  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 44  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 45  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 46  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 47  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 48  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 49  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 50  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 51  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 52  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 53  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 54  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 55  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 56  ( 1 ) "*"      "*"      "*"      "*"      "*"                
## 57  ( 1 ) "*"      "*"      "*"      "*"      "*"                
##           Semana_Santa_Semana Feriados_Lunes Feriados_Otros
## 1  ( 1 )  " "                 " "            " "           
## 2  ( 1 )  "*"                 " "            " "           
## 3  ( 1 )  "*"                 "*"            " "           
## 4  ( 1 )  "*"                 "*"            " "           
## 5  ( 1 )  "*"                 "*"            " "           
## 6  ( 1 )  "*"                 "*"            " "           
## 7  ( 1 )  "*"                 "*"            "*"           
## 8  ( 1 )  "*"                 "*"            "*"           
## 9  ( 1 )  "*"                 "*"            "*"           
## 10  ( 1 ) "*"                 "*"            "*"           
## 11  ( 1 ) "*"                 "*"            "*"           
## 12  ( 1 ) "*"                 "*"            "*"           
## 13  ( 1 ) "*"                 "*"            "*"           
## 14  ( 1 ) "*"                 "*"            "*"           
## 15  ( 1 ) "*"                 "*"            "*"           
## 16  ( 1 ) "*"                 "*"            "*"           
## 17  ( 1 ) "*"                 "*"            "*"           
## 18  ( 1 ) "*"                 "*"            "*"           
## 19  ( 1 ) "*"                 "*"            "*"           
## 20  ( 1 ) "*"                 "*"            "*"           
## 21  ( 1 ) "*"                 "*"            "*"           
## 22  ( 1 ) "*"                 "*"            "*"           
## 23  ( 1 ) "*"                 "*"            "*"           
## 24  ( 1 ) "*"                 "*"            "*"           
## 25  ( 1 ) "*"                 "*"            "*"           
## 26  ( 1 ) "*"                 "*"            "*"           
## 27  ( 1 ) "*"                 "*"            "*"           
## 28  ( 1 ) "*"                 "*"            "*"           
## 29  ( 1 ) "*"                 "*"            "*"           
## 30  ( 1 ) "*"                 "*"            "*"           
## 31  ( 1 ) "*"                 "*"            "*"           
## 32  ( 1 ) "*"                 "*"            "*"           
## 33  ( 1 ) "*"                 "*"            "*"           
## 34  ( 1 ) "*"                 "*"            "*"           
## 35  ( 1 ) "*"                 "*"            "*"           
## 36  ( 1 ) "*"                 "*"            "*"           
## 37  ( 1 ) "*"                 "*"            "*"           
## 38  ( 1 ) "*"                 "*"            "*"           
## 39  ( 1 ) "*"                 "*"            "*"           
## 40  ( 1 ) "*"                 "*"            "*"           
## 41  ( 1 ) "*"                 "*"            "*"           
## 42  ( 1 ) "*"                 "*"            "*"           
## 43  ( 1 ) "*"                 "*"            "*"           
## 44  ( 1 ) "*"                 "*"            "*"           
## 45  ( 1 ) "*"                 "*"            "*"           
## 46  ( 1 ) "*"                 "*"            "*"           
## 47  ( 1 ) "*"                 "*"            "*"           
## 48  ( 1 ) "*"                 "*"            "*"           
## 49  ( 1 ) "*"                 "*"            "*"           
## 50  ( 1 ) "*"                 "*"            "*"           
## 51  ( 1 ) "*"                 "*"            "*"           
## 52  ( 1 ) "*"                 "*"            "*"           
## 53  ( 1 ) "*"                 "*"            "*"           
## 54  ( 1 ) "*"                 "*"            "*"           
## 55  ( 1 ) "*"                 "*"            "*"           
## 56  ( 1 ) "*"                 "*"            "*"           
## 57  ( 1 ) "*"                 "*"            "*"
reg.summary =summary(regfit.fwd)
names(reg.summary)
## [1] "which"  "rsq"    "rss"    "adjr2"  "cp"     "bic"    "outmat" "obj"
reg.summary$rsq
##  [1] 0.2564840 0.3589831 0.4342450 0.4836158 0.5143629 0.5330118 0.5619310
##  [8] 0.5750247 0.5839300 0.5927802 0.5997164 0.6064533 0.6118691 0.6161374
## [15] 0.6209274 0.6259955 0.6303852 0.6350712 0.6391964 0.6434377 0.6466886
## [22] 0.6493843 0.6520138 0.6545914 0.6569814 0.6592532 0.6615622 0.6640333
## [29] 0.6666124 0.6690087 0.6714405 0.6740170 0.6766598 0.6791777 0.6810979
## [36] 0.6824556 0.6838543 0.6854185 0.6869902 0.6886558 0.6903860 0.6921423
## [43] 0.6940457 0.6961530 0.6985117 0.7012379 0.7042655 0.7077893 0.7113063
## [50] 0.7149150 0.7190467 0.7235920 0.7297406 0.7381385 0.7470696 0.7633664
## [57] 0.8113550

Selección de variables con el mejor R^2 ajustado

max_adjr<-which.max (reg.summary$adjr2)
max_adjr
## [1] 57
par(mfrow =c(2,2))
plot(reg.summary$rss ,xlab=" Número de Variables ",ylab=" RSS",
type="l")
plot(reg.summary$adjr2 ,xlab =" Número de Variables ",
ylab=" Adjusted RSq",type="l")

points (max_adjr, reg.summary$adjr2[max_adjr], col ="red",cex =2, pch =20)

plot(regfit.fwd ,scale ="adjr2")

Variables seleccionadas

coef(regfit.fwd ,max_adjr)
##         (Intercept)            Ano_Base            SEMANA02 
##           536.60433            16.93497           108.54653 
##            SEMANA03            SEMANA04            SEMANA05 
##           220.46763           224.92818           257.17818 
##            SEMANA06            SEMANA07            SEMANA08 
##           291.17818           308.67818           287.17818 
##            SEMANA09            SEMANA10            SEMANA11 
##           281.92818           334.92818           314.67818 
##            SEMANA12            SEMANA13            SEMANA14 
##           290.06301           286.50708           325.48411 
##            SEMANA15            SEMANA16            SEMANA17 
##           317.98411           266.23411           332.21409 
##            SEMANA18            SEMANA19            SEMANA20 
##           322.03945           308.21763           310.17818 
##            SEMANA21            SEMANA22            SEMANA23 
##           299.96763           265.50708           313.00708 
##            SEMANA24            SEMANA25            SEMANA26 
##           290.46763           245.50708           201.00708 
##            SEMANA27            SEMANA28            SEMANA29 
##           263.33598           283.42818           311.28591 
##            SEMANA30            SEMANA31            SEMANA32 
##           313.46763           336.17995           307.25531 
##            SEMANA33            SEMANA34            SEMANA35 
##           318.71763           275.79653           313.42818 
##            SEMANA36            SEMANA37            SEMANA38 
##           290.92818           340.67818           340.67818 
##            SEMANA39            SEMANA40            SEMANA41 
##           278.42818           335.92818           235.67818 
##            SEMANA42            SEMANA43            SEMANA44 
##           287.33598           292.42818           284.92818 
##            SEMANA45            SEMANA46            SEMANA47 
##           246.58598           293.00708           282.00708 
##            SEMANA48            SEMANA49            SEMANA50 
##           280.17818           299.00000           304.50354 
##            SEMANA51            SEMANA52            SEMANA53 
##           342.46409           187.82536          -241.50000 
## Feria_Flores_Semana Semana_Santa_Semana      Feriados_Lunes 
##            98.06828          -167.93643           -46.15780 
##      Feriados_Otros 
##           -44.14364

4. Modelamiento

set.seed(123) # fija la semilla del generador de parámetros para que sea reproducible

Se realiza el modelo de regresión lineal con las variables seleccionadas y se revisa el p-valor de cada una para seleccionar las variables definitivas del modelo

library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.5.3
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s = caret::train(TOTAL_ACCIDENTES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
                      method = "lm", trControl = trcntrl,
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
summary(caret_lm_fit_s)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -228.461  -26.757    3.867   27.361  191.539 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          805.133      3.572 225.417  < 2e-16 ***
## Ano_Base               8.488      3.583   2.369 0.019107 *  
## SEMANA02              14.873      5.317   2.797 0.005817 ** 
## SEMANA03              30.208      5.185   5.827 3.27e-08 ***
## SEMANA04              30.819      5.172   5.959 1.70e-08 ***
## SEMANA05              35.238      5.172   6.813 2.09e-10 ***
## SEMANA06              39.897      5.172   7.714 1.50e-12 ***
## SEMANA07              42.295      5.172   8.177 1.07e-13 ***
## SEMANA08              39.349      5.172   7.608 2.71e-12 ***
## SEMANA09              38.630      5.172   7.469 5.89e-12 ***
## SEMANA10              45.892      5.172   8.873 1.83e-15 ***
## SEMANA11              43.117      5.172   8.336 4.25e-14 ***
## SEMANA12              39.744      5.322   7.468 5.92e-12 ***
## SEMANA13              39.257      5.233   7.502 4.91e-12 ***
## SEMANA14              44.598      5.274   8.456 2.12e-14 ***
## SEMANA15              43.570      5.274   8.261 6.56e-14 ***
## SEMANA16              36.479      5.274   6.917 1.20e-10 ***
## SEMANA17              45.520      5.055   9.006 8.34e-16 ***
## SEMANA18              44.126      5.034   8.766 3.44e-15 ***
## SEMANA19              42.232      5.185   8.146 1.28e-13 ***
## SEMANA20              42.500      5.172   8.217 8.47e-14 ***
## SEMANA21              41.101      5.185   7.928 4.46e-13 ***
## SEMANA22              36.380      5.233   6.952 9.96e-11 ***
## SEMANA23              42.888      5.233   8.196 9.60e-14 ***
## SEMANA24              39.800      5.185   7.677 1.85e-12 ***
## SEMANA25              33.639      5.233   6.428 1.58e-09 ***
## SEMANA26              27.542      5.233   5.263 4.75e-07 ***
## SEMANA27              36.082      5.434   6.641 5.21e-10 ***
## SEMANA28              38.835      5.172   7.509 4.72e-12 ***
## SEMANA29              42.652      5.055   8.438 2.35e-14 ***
## SEMANA30              42.951      5.185   8.284 5.75e-14 ***
## SEMANA31              46.063      5.827   7.906 5.05e-13 ***
## SEMANA32              42.100      5.718   7.363 1.06e-11 ***
## SEMANA33              43.670      5.185   8.423 2.57e-14 ***
## SEMANA34              37.789      5.317   7.108 4.29e-11 ***
## SEMANA35              42.946      5.172   8.303 5.15e-14 ***
## SEMANA36              39.863      5.172   7.707 1.55e-12 ***
## SEMANA37              46.679      5.172   9.025 7.42e-16 ***
## SEMANA38              46.679      5.172   9.025 7.42e-16 ***
## SEMANA39              38.150      5.172   7.376 9.85e-12 ***
## SEMANA40              46.029      5.172   8.899 1.57e-15 ***
## SEMANA41              32.292      5.172   6.244 4.07e-09 ***
## SEMANA42              39.370      5.434   7.246 2.02e-11 ***
## SEMANA43              40.068      5.172   7.747 1.24e-12 ***
## SEMANA44              39.041      5.172   7.548 3.78e-12 ***
## SEMANA45              33.787      5.434   6.218 4.63e-09 ***
## SEMANA46              40.148      5.233   7.672 1.89e-12 ***
## SEMANA47              38.640      5.233   7.384 9.43e-12 ***
## SEMANA48              38.390      5.172   7.422 7.62e-12 ***
## SEMANA49              40.969      5.015   8.169 1.12e-13 ***
## SEMANA50              41.723      5.070   8.229 7.92e-14 ***
## SEMANA51              46.924      5.055   9.283  < 2e-16 ***
## SEMANA52              25.736      5.076   5.070 1.14e-06 ***
## SEMANA53             -23.512      4.364  -5.388 2.67e-07 ***
## Feria_Flores_Semana   13.437      5.372   2.502 0.013424 *  
## Semana_Santa_Semana  -23.010      6.530  -3.524 0.000562 ***
## Feriados_Lunes       -18.830      5.163  -3.647 0.000364 ***
## Feriados_Otros       -17.145      7.176  -2.389 0.018113 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 51.76 on 152 degrees of freedom
## Multiple R-squared:  0.8114, Adjusted R-squared:  0.7406 
## F-statistic: 11.47 on 57 and 152 DF,  p-value: < 2.2e-16

1. REGRESION LINEAL

Total Accidentes

head(Train_S_Dataset)
##    ANO SEMANA ACCIDENTES_GRAVES ACCIDENTES_LEVES TOTAL_ACCIDENTES Ano_Sem
## 1 2014     01               399              285              684 2014_01
## 2 2014     02               318              254              572 2014_02
## 3 2014     03               385              326              711 2014_03
## 4 2014     04               377              350              727 2014_04
## 5 2014     05               420              371              791 2014_05
## 6 2014     06               432              369              801 2014_06
##   Ano_Base Feria_Flores_Semana Semana_Santa_Semana Feriados_Lunes
## 1        0                   0                   0              0
## 2        0                   0                   0              1
## 3        0                   0                   0              0
## 4        0                   0                   0              0
## 5        0                   0                   0              0
## 6        0                   0                   0              0
##   Feriados_Otros
## 1              1
## 2              0
## 3              0
## 4              0
## 5              0
## 6              0
library(caret)
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s = caret::train(TOTAL_ACCIDENTES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
                      method = "lm", trControl = trcntrl,
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
summary(caret_lm_fit_s)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -228.461  -26.757    3.867   27.361  191.539 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          805.133      3.572 225.417  < 2e-16 ***
## Ano_Base               8.488      3.583   2.369 0.019107 *  
## SEMANA02              14.873      5.317   2.797 0.005817 ** 
## SEMANA03              30.208      5.185   5.827 3.27e-08 ***
## SEMANA04              30.819      5.172   5.959 1.70e-08 ***
## SEMANA05              35.238      5.172   6.813 2.09e-10 ***
## SEMANA06              39.897      5.172   7.714 1.50e-12 ***
## SEMANA07              42.295      5.172   8.177 1.07e-13 ***
## SEMANA08              39.349      5.172   7.608 2.71e-12 ***
## SEMANA09              38.630      5.172   7.469 5.89e-12 ***
## SEMANA10              45.892      5.172   8.873 1.83e-15 ***
## SEMANA11              43.117      5.172   8.336 4.25e-14 ***
## SEMANA12              39.744      5.322   7.468 5.92e-12 ***
## SEMANA13              39.257      5.233   7.502 4.91e-12 ***
## SEMANA14              44.598      5.274   8.456 2.12e-14 ***
## SEMANA15              43.570      5.274   8.261 6.56e-14 ***
## SEMANA16              36.479      5.274   6.917 1.20e-10 ***
## SEMANA17              45.520      5.055   9.006 8.34e-16 ***
## SEMANA18              44.126      5.034   8.766 3.44e-15 ***
## SEMANA19              42.232      5.185   8.146 1.28e-13 ***
## SEMANA20              42.500      5.172   8.217 8.47e-14 ***
## SEMANA21              41.101      5.185   7.928 4.46e-13 ***
## SEMANA22              36.380      5.233   6.952 9.96e-11 ***
## SEMANA23              42.888      5.233   8.196 9.60e-14 ***
## SEMANA24              39.800      5.185   7.677 1.85e-12 ***
## SEMANA25              33.639      5.233   6.428 1.58e-09 ***
## SEMANA26              27.542      5.233   5.263 4.75e-07 ***
## SEMANA27              36.082      5.434   6.641 5.21e-10 ***
## SEMANA28              38.835      5.172   7.509 4.72e-12 ***
## SEMANA29              42.652      5.055   8.438 2.35e-14 ***
## SEMANA30              42.951      5.185   8.284 5.75e-14 ***
## SEMANA31              46.063      5.827   7.906 5.05e-13 ***
## SEMANA32              42.100      5.718   7.363 1.06e-11 ***
## SEMANA33              43.670      5.185   8.423 2.57e-14 ***
## SEMANA34              37.789      5.317   7.108 4.29e-11 ***
## SEMANA35              42.946      5.172   8.303 5.15e-14 ***
## SEMANA36              39.863      5.172   7.707 1.55e-12 ***
## SEMANA37              46.679      5.172   9.025 7.42e-16 ***
## SEMANA38              46.679      5.172   9.025 7.42e-16 ***
## SEMANA39              38.150      5.172   7.376 9.85e-12 ***
## SEMANA40              46.029      5.172   8.899 1.57e-15 ***
## SEMANA41              32.292      5.172   6.244 4.07e-09 ***
## SEMANA42              39.370      5.434   7.246 2.02e-11 ***
## SEMANA43              40.068      5.172   7.747 1.24e-12 ***
## SEMANA44              39.041      5.172   7.548 3.78e-12 ***
## SEMANA45              33.787      5.434   6.218 4.63e-09 ***
## SEMANA46              40.148      5.233   7.672 1.89e-12 ***
## SEMANA47              38.640      5.233   7.384 9.43e-12 ***
## SEMANA48              38.390      5.172   7.422 7.62e-12 ***
## SEMANA49              40.969      5.015   8.169 1.12e-13 ***
## SEMANA50              41.723      5.070   8.229 7.92e-14 ***
## SEMANA51              46.924      5.055   9.283  < 2e-16 ***
## SEMANA52              25.736      5.076   5.070 1.14e-06 ***
## SEMANA53             -23.512      4.364  -5.388 2.67e-07 ***
## Feria_Flores_Semana   13.437      5.372   2.502 0.013424 *  
## Semana_Santa_Semana  -23.010      6.530  -3.524 0.000562 ***
## Feriados_Lunes       -18.830      5.163  -3.647 0.000364 ***
## Feriados_Otros       -17.145      7.176  -2.389 0.018113 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 51.76 on 152 degrees of freedom
## Multiple R-squared:  0.8114, Adjusted R-squared:  0.7406 
## F-statistic: 11.47 on 57 and 152 DF,  p-value: < 2.2e-16
caret_lm_fit_s
## Linear Regression 
## 
## 210 samples
##   6 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 189, 189, 188, 189, 190, 189, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   62.60188  0.5837995  46.87437
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_lm_s<-predict(caret_lm_fit_s,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_lm_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_tr_pred_lm_s)^2) # calcula el mse de entrenamiento
RMSE_tr_lm_s = sqrt(mse_tr_lm_s)
mse_tr_lm_s
## [1] 1939.128
RMSE_tr_lm_s
## [1] 44.03553

Calculo MSE y RMSE para los datos de validación

y_test_pred_lm_s<-predict(caret_lm_fit_s,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_lm_s<-mean((Test_S_Dataset$TOTAL_ACCIDENTES-y_test_pred_lm_s)^2) # calcula el mse de entrenamiento
RMSE_test_lm_s = sqrt(mse_test_lm_s)
mse_test_lm_s
## [1] 4053.676
RMSE_test_lm_s
## [1] 63.66849

Predicción en la muestra

library(plotly)
## Warning: package 'plotly' was built under R version 3.5.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:reshape':
## 
##     rename
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~TOTAL_ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_lm_s,
            name='Modelo lm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~TOTAL_ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_lm_s,
            name='Modelo lm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Accidentes Graves

trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s_m = caret::train(ACCIDENTES_GRAVES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
                      method = "lm", trControl = trcntrl,
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
summary(caret_lm_fit_s_m)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -122.630  -20.549    3.016   22.611   88.370 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          448.462      2.433 184.298  < 2e-16 ***
## Ano_Base               2.305      2.441   0.944 0.346661    
## SEMANA02               4.765      3.622   1.316 0.190261    
## SEMANA03              14.633      3.532   4.143 5.67e-05 ***
## SEMANA04              12.219      3.524   3.468 0.000683 ***
## SEMANA05              16.946      3.524   4.809 3.61e-06 ***
## SEMANA06              20.612      3.524   5.850 2.92e-08 ***
## SEMANA07              17.837      3.524   5.062 1.18e-06 ***
## SEMANA08              19.481      3.524   5.529 1.37e-07 ***
## SEMANA09              17.940      3.524   5.091 1.04e-06 ***
## SEMANA10              21.982      3.524   6.238 4.18e-09 ***
## SEMANA11              21.674      3.524   6.151 6.51e-09 ***
## SEMANA12              19.114      3.626   5.272 4.56e-07 ***
## SEMANA13              19.136      3.565   5.368 2.93e-07 ***
## SEMANA14              21.308      3.593   5.931 1.96e-08 ***
## SEMANA15              18.979      3.593   5.282 4.35e-07 ***
## SEMANA16              18.088      3.593   5.034 1.34e-06 ***
## SEMANA17              20.034      3.444   5.818 3.41e-08 ***
## SEMANA18              18.651      3.429   5.439 2.10e-07 ***
## SEMANA19              19.120      3.532   5.413 2.36e-07 ***
## SEMANA20              19.070      3.524   5.412 2.38e-07 ***
## SEMANA21              19.155      3.532   5.423 2.26e-07 ***
## SEMANA22              17.013      3.565   4.772 4.25e-06 ***
## SEMANA23              21.260      3.565   5.963 1.66e-08 ***
## SEMANA24              18.641      3.532   5.278 4.44e-07 ***
## SEMANA25              15.266      3.565   4.282 3.27e-05 ***
## SEMANA26              10.847      3.565   3.042 0.002765 ** 
## SEMANA27              16.873      3.702   4.558 1.05e-05 ***
## SEMANA28              18.419      3.524   5.227 5.59e-07 ***
## SEMANA29              19.564      3.444   5.681 6.61e-08 ***
## SEMANA30              19.669      3.532   5.568 1.14e-07 ***
## SEMANA31              21.456      3.970   5.405 2.45e-07 ***
## SEMANA32              18.155      3.895   4.660 6.85e-06 ***
## SEMANA33              20.799      3.532   5.888 2.41e-08 ***
## SEMANA34              18.776      3.622   5.184 6.83e-07 ***
## SEMANA35              23.455      3.524   6.656 4.80e-10 ***
## SEMANA36              18.145      3.524   5.150 7.97e-07 ***
## SEMANA37              22.770      3.524   6.462 1.33e-09 ***
## SEMANA38              21.845      3.524   6.199 5.09e-09 ***
## SEMANA39              19.104      3.524   5.422 2.27e-07 ***
## SEMANA40              20.646      3.524   5.859 2.78e-08 ***
## SEMANA41              12.562      3.524   3.565 0.000487 ***
## SEMANA42              18.449      3.702   4.984 1.68e-06 ***
## SEMANA43              18.180      3.524   5.159 7.63e-07 ***
## SEMANA44              16.227      3.524   4.605 8.65e-06 ***
## SEMANA45              15.161      3.702   4.096 6.82e-05 ***
## SEMANA46              16.019      3.565   4.493 1.38e-05 ***
## SEMANA47              16.019      3.565   4.493 1.38e-05 ***
## SEMANA48              14.994      3.524   4.255 3.64e-05 ***
## SEMANA49              14.730      3.416   4.311 2.91e-05 ***
## SEMANA50              15.392      3.454   4.456 1.61e-05 ***
## SEMANA51              18.356      3.444   5.330 3.48e-07 ***
## SEMANA52              12.044      3.458   3.483 0.000648 ***
## SEMANA53             -15.820      2.973  -5.321 3.63e-07 ***
## Feria_Flores_Semana    7.089      3.660   1.937 0.054571 .  
## Semana_Santa_Semana  -13.709      4.449  -3.082 0.002445 ** 
## Feriados_Lunes        -7.126      3.518  -2.026 0.044534 *  
## Feriados_Otros        -6.658      4.889  -1.362 0.175272    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 35.26 on 152 degrees of freedom
## Multiple R-squared:  0.7116, Adjusted R-squared:  0.6034 
## F-statistic: 6.579 on 57 and 152 DF,  p-value: < 2.2e-16
caret_lm_fit_s_m
## Linear Regression 
## 
## 210 samples
##   6 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 187, 189, 188, 190, 189, 189, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   41.22433  0.4038684  33.51642
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_lm_s_m<-predict(caret_lm_fit_s_m,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_lm_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_tr_pred_lm_s_m)^2) # calcula el mse de entrenamiento
RMSE_tr_lm_s_m = sqrt(mse_tr_lm_s_m)
mse_tr_lm_s_m
## [1] 900.0193
RMSE_tr_lm_s_m
## [1] 30.00032

Calculo MSE y RMSE para los datos de validación

y_test_pred_lm_s_m<-predict(caret_lm_fit_s_m,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_lm_s_m<-mean((Test_S_Dataset$ACCIDENTES_GRAVES-y_test_pred_lm_s_m)^2) # calcula el mse de entrenamiento
RMSE_test_lm_s_m = sqrt(mse_test_lm_s_m)
mse_test_lm_s_m
## [1] 3135.736
RMSE_test_lm_s_m
## [1] 55.99764

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_GRAVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_lm_s_m,
            name='Modelo lm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes graves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes graves"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_GRAVES,
         type = "scatter" ,mode = "lines",
         split = ~ANO,
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_lm_s_m,
            name='Modelo lm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes graves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes graves"),
         legend = list(x = 1, y = 0.4))

Accidentes Leves

trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
                      method = "lm", trControl = trcntrl,
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
summary(caret_lm_fit_s_sd)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -105.831  -15.978    0.582   17.520  103.169 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          356.671      2.090 170.673  < 2e-16 ***
## Ano_Base               6.183      2.097   2.949 0.003689 ** 
## SEMANA02              10.107      3.111   3.249 0.001424 ** 
## SEMANA03              15.575      3.033   5.134 8.54e-07 ***
## SEMANA04              18.600      3.026   6.147 6.66e-09 ***
## SEMANA05              18.292      3.026   6.045 1.11e-08 ***
## SEMANA06              19.285      3.026   6.373 2.10e-09 ***
## SEMANA07              24.458      3.026   8.082 1.84e-13 ***
## SEMANA08              19.868      3.026   6.565 7.74e-10 ***
## SEMANA09              20.690      3.026   6.837 1.84e-10 ***
## SEMANA10              23.910      3.026   7.901 5.18e-13 ***
## SEMANA11              21.443      3.026   7.086 4.82e-11 ***
## SEMANA12              20.630      3.114   6.625 5.65e-10 ***
## SEMANA13              20.120      3.062   6.571 7.50e-10 ***
## SEMANA14              23.289      3.086   7.548 3.80e-12 ***
## SEMANA15              24.591      3.086   7.969 3.51e-13 ***
## SEMANA16              18.391      3.086   5.960 1.69e-08 ***
## SEMANA17              25.485      2.957   8.618 8.25e-15 ***
## SEMANA18              25.475      2.945   8.650 6.81e-15 ***
## SEMANA19              23.111      3.033   7.619 2.55e-12 ***
## SEMANA20              23.430      3.026   7.743 1.27e-12 ***
## SEMANA21              21.947      3.033   7.235 2.14e-11 ***
## SEMANA22              19.367      3.062   6.325 2.68e-09 ***
## SEMANA23              21.628      3.062   7.064 5.45e-11 ***
## SEMANA24              21.159      3.033   6.975 8.79e-11 ***
## SEMANA25              18.373      3.062   6.001 1.38e-08 ***
## SEMANA26              16.695      3.062   5.453 1.97e-07 ***
## SEMANA27              19.209      3.179   6.042 1.12e-08 ***
## SEMANA28              20.416      3.026   6.746 2.98e-10 ***
## SEMANA29              23.088      2.957   7.807 8.85e-13 ***
## SEMANA30              23.283      3.033   7.675 1.86e-12 ***
## SEMANA31              24.607      3.409   7.218 2.35e-11 ***
## SEMANA32              23.945      3.345   7.158 3.27e-11 ***
## SEMANA33              22.871      3.033   7.540 3.97e-12 ***
## SEMANA34              19.014      3.111   6.112 7.90e-09 ***
## SEMANA35              19.491      3.026   6.441 1.48e-09 ***
## SEMANA36              21.717      3.026   7.177 2.95e-11 ***
## SEMANA37              23.910      3.026   7.901 5.18e-13 ***
## SEMANA38              24.835      3.026   8.207 9.00e-14 ***
## SEMANA39              19.046      3.026   6.294 3.15e-09 ***
## SEMANA40              25.383      3.026   8.388 3.15e-14 ***
## SEMANA41              19.731      3.026   6.520 9.80e-10 ***
## SEMANA42              20.921      3.179   6.581 7.13e-10 ***
## SEMANA43              21.889      3.026   7.233 2.16e-11 ***
## SEMANA44              22.814      3.026   7.539 3.99e-12 ***
## SEMANA45              18.626      3.179   5.859 2.79e-08 ***
## SEMANA46              24.128      3.062   7.880 5.83e-13 ***
## SEMANA47              22.621      3.062   7.388 9.21e-12 ***
## SEMANA48              23.396      3.026   7.731 1.36e-12 ***
## SEMANA49              26.239      2.934   8.943 1.21e-15 ***
## SEMANA50              26.331      2.967   8.876 1.80e-15 ***
## SEMANA51              28.568      2.957   9.660  < 2e-16 ***
## SEMANA52              13.691      2.970   4.610 8.48e-06 ***
## SEMANA53              -7.691      2.553  -3.012 0.003039 ** 
## Feria_Flores_Semana    6.348      3.143   2.020 0.045160 *  
## Semana_Santa_Semana   -9.301      3.821  -2.434 0.016075 *  
## Feriados_Lunes       -11.704      3.021  -3.874 0.000159 ***
## Feriados_Otros       -10.487      4.199  -2.498 0.013564 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 30.28 on 152 degrees of freedom
## Multiple R-squared:  0.7706, Adjusted R-squared:  0.6846 
## F-statistic:  8.96 on 57 and 152 DF,  p-value: < 2.2e-16
caret_lm_fit_s_sd
## Linear Regression 
## 
## 210 samples
##   6 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 187, 190, 189, 189, 188, 189, ... 
## Resampling results:
## 
##   RMSE     Rsquared   MAE     
##   37.4432  0.5530664  28.47099
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_lm_s_sd<-predict(caret_lm_fit_s_sd,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_lm_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_tr_pred_lm_s_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_lm_s_sd = sqrt(mse_tr_lm_s_sd)
mse_tr_lm_s_sd
## [1] 663.8167
RMSE_tr_lm_s_sd
## [1] 25.76464

Calculo MSE y RMSE para los datos de validación

y_test_pred_lm_s_sd<-predict(caret_lm_fit_s_sd,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_lm_s_sd<-mean((Test_S_Dataset$ACCIDENTES_LEVES-y_test_pred_lm_s_sd)^2) # calcula el mse de entrenamiento
RMSE_test_lm_s_sd = sqrt(mse_test_lm_s_sd)
mse_test_lm_s_sd
## [1] 1110.178
RMSE_test_lm_s_sd
## [1] 33.31933

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_LEVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_lm_s_sd,
            name='Modelo lm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total Accidentes leves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes leves"),
         legend = list(x = 0.75, y = 0.9))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_LEVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_lm_s_sd,
            name='Modelo lm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes leves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes leves"),
         legend = list(x = 0.75, y = 0.9))

Resumen Modelos Regresión lineal para los diferentes tipos de accidente

Tipo_de_accidentes= c("Total Accidentes","Accidentes graves","Accidentes leves")
RMSE_Train_lm = round(c(RMSE_tr_lm_s,RMSE_tr_lm_s_m,RMSE_tr_lm_s_sd), 3)
RMSE_Test_lm = round(c(RMSE_test_lm_s,RMSE_test_lm_s_m,RMSE_test_lm_s_sd),3)

Tabla_lm = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_lm,RMSE_Test_lm))
Tabla_lm
##   Tipo_de_accidentes RMSE_Train_lm RMSE_Test_lm
## 1   Total Accidentes        44.036       63.668
## 2  Accidentes graves            30       55.998
## 3   Accidentes leves        25.765       33.319

2. KNN

head(Train_S_Dataset)
##    ANO SEMANA ACCIDENTES_GRAVES ACCIDENTES_LEVES TOTAL_ACCIDENTES Ano_Sem
## 1 2014     01               399              285              684 2014_01
## 2 2014     02               318              254              572 2014_02
## 3 2014     03               385              326              711 2014_03
## 4 2014     04               377              350              727 2014_04
## 5 2014     05               420              371              791 2014_05
## 6 2014     06               432              369              801 2014_06
##   Ano_Base Feria_Flores_Semana Semana_Santa_Semana Feriados_Lunes
## 1        0                   0                   0              0
## 2        0                   0                   0              1
## 3        0                   0                   0              0
## 4        0                   0                   0              0
## 5        0                   0                   0              0
## 6        0                   0                   0              0
##   Feriados_Otros
## 1              1
## 2              0
## 3              0
## 4              0
## 5              0
## 6              0

Total Accidentes

library(caret)
trcntrl = trainControl(method="cv", number=10)
caret_knn_fit_s = caret::train(TOTAL_ACCIDENTES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
                      method = "knn", trControl = trcntrl,
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
summary(caret_knn_fit_s)
##             Length Class      Mode     
## learn        2     -none-     list     
## k            1     -none-     numeric  
## theDots      0     -none-     list     
## xNames      57     -none-     character
## problemType  1     -none-     character
## tuneValue    1     data.frame list     
## obsLevels    1     -none-     logical  
## param        0     -none-     list
caret_knn_fit_s
## k-Nearest Neighbors 
## 
## 210 samples
##   6 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 189, 189, 189, 189, 189, 189, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE       Rsquared   MAE      
##    5  159.87102  0.1188231  141.52602
##    7  132.51903  0.1583542  107.30246
##    9   87.62084  0.2976933   62.50242
##   11   88.67186  0.2735668   62.98123
##   13   89.06752  0.2630701   63.05815
##   15   88.76619  0.2532501   62.54331
##   17   87.75910  0.2655133   61.60049
##   19   89.37957  0.2236102   61.68659
##   21   88.91507  0.2333045   60.61590
##   23   90.36976  0.1946510   61.38025
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 9.

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_knn_s<-predict(caret_knn_fit_s,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_knn_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_tr_pred_knn_s)^2) # calcula el mse de entrenamiento
RMSE_tr_knn_s = sqrt(mse_tr_knn_s)
mse_tr_knn_s
## [1] 7689.113
RMSE_tr_knn_s
## [1] 87.68759

Calculo MSE y RMSE para los datos de validación

y_test_pred_knn_s<-predict(caret_knn_fit_s,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_knn_s<-mean((Test_S_Dataset$TOTAL_ACCIDENTES-y_test_pred_knn_s)^2) # calcula el mse de entrenamiento
RMSE_test_knn_s = sqrt(mse_test_knn_s)
mse_test_knn_s
## [1] 2811.723
RMSE_test_knn_s
## [1] 53.02568

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~TOTAL_ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_knn_s,
            name='Modelo knn',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total Accidentes',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~TOTAL_ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_knn_s,
            name='Modelo knn',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total Accidentes',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Accidentes graves

trcntrl = trainControl(method="cv", number=10)
caret_knn_fit_s_m = caret::train(ACCIDENTES_GRAVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
                      method = "knn", trControl = trcntrl,
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
summary(caret_knn_fit_s_m)
##             Length Class      Mode     
## learn        2     -none-     list     
## k            1     -none-     numeric  
## theDots      0     -none-     list     
## xNames      57     -none-     character
## problemType  1     -none-     character
## tuneValue    1     data.frame list     
## obsLevels    1     -none-     logical  
## param        0     -none-     list
caret_knn_fit_s_m
## k-Nearest Neighbors 
## 
## 210 samples
##   6 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 190, 189, 189, 189, 190, 188, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  73.35329  0.1595947  62.60777
##    7  75.04626  0.1755304  61.51550
##    9  53.01555  0.1578638  37.65618
##   11  52.23325  0.1804905  37.39030
##   13  51.74448  0.2025601  37.32592
##   15  51.96793  0.1817838  37.34017
##   17  52.20766  0.1573873  37.14202
##   19  51.82887  0.1802767  36.90829
##   21  52.39492  0.1498191  37.17210
##   23  52.56264  0.1543004  37.28509
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 13.

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_knn_s_m<-predict(caret_knn_fit_s_m,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_knn_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_tr_pred_knn_s_m)^2) # calcula el mse de entrenamiento
RMSE_tr_knn_s_m = sqrt(mse_tr_knn_s_m)
mse_tr_knn_s_m
## [1] 2548.135
RMSE_tr_knn_s_m
## [1] 50.47905

Calculo MSE y RMSE para los datos de validación

y_test_pred_knn_s_m<-predict(caret_knn_fit_s_m,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_knn_s_m<-mean((Test_S_Dataset$ACCIDENTES_GRAVES-y_test_pred_knn_s_m)^2) # calcula el mse de entrenamiento
RMSE_test_knn_s_m = sqrt(mse_test_knn_s_m)
mse_test_knn_s_m
## [1] 1990.944
RMSE_test_knn_s_m
## [1] 44.61999

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_GRAVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_knn_s_m,
            name='Modelo knn',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total Accidentes graves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes graves"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_GRAVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_knn_s_m,
            name='Modelo knn',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes graves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes graves"),
         legend = list(x = 1, y = 0.4))

Accidentes Leves

trcntrl = trainControl(method="cv", number=10)
caret_knn_fit_s_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
                      method = "knn", trControl = trcntrl,
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
summary(caret_knn_fit_s_sd)
##             Length Class      Mode     
## learn        2     -none-     list     
## k            1     -none-     numeric  
## theDots      0     -none-     list     
## xNames      57     -none-     character
## problemType  1     -none-     character
## tuneValue    1     data.frame list     
## obsLevels    1     -none-     logical  
## param        0     -none-     list
caret_knn_fit_s_sd
## k-Nearest Neighbors 
## 
## 210 samples
##   6 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 189, 189, 189, 189, 190, 189, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  85.05207  0.1484905  75.41969
##    7  76.04815  0.1885851  62.96637
##    9  48.81370  0.2628027  36.43669
##   11  47.95157  0.2475786  35.85214
##   13  47.91406  0.2392017  35.38373
##   15  48.16050  0.2135900  35.48826
##   17  47.92511  0.2208918  35.42153
##   19  47.34845  0.2220739  35.00565
##   21  47.32389  0.2199801  35.13971
##   23  47.43791  0.2101234  35.07661
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 21.

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_knn_s_sd<-predict(caret_knn_fit_s_sd,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_knn_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_tr_pred_knn_s_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_knn_s_sd = sqrt(mse_tr_knn_s_sd)
mse_tr_knn_s_sd
## [1] 2514.099
RMSE_tr_knn_s_sd
## [1] 50.14079

Calculo MSE y RMSE para los datos de validación

y_test_pred_knn_s_sd<-predict(caret_knn_fit_s_sd,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_knn_s_sd<-mean((Test_S_Dataset$ACCIDENTES_LEVES-y_test_pred_knn_s_sd)^2) # calcula el mse de entrenamiento
RMSE_test_knn_s_sd = sqrt(mse_test_knn_s_sd)
mse_test_knn_s_sd
## [1] 1705.109
RMSE_test_knn_s_sd
## [1] 41.29297

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_LEVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_knn_s_sd,
            name='Modelo knn',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total Accidentes leves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_LEVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_knn_s_sd,
            name='Modelo knn',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Accidentes leves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Resumen Modelos KNN para los diferentes tipos de accidente

Tipo_de_accidentes= c("Total Accidentes","Accidentes graves","Accidentes leves")
RMSE_Train_knn = round(c(RMSE_tr_knn_s,RMSE_tr_knn_s_m,RMSE_tr_knn_s_sd), 3)
RMSE_Test_knn = round(c(RMSE_test_knn_s,RMSE_test_knn_s_m,RMSE_test_knn_s_sd),3)

Tabla_knn = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_knn,RMSE_Test_knn))
Tabla_knn
##   Tipo_de_accidentes RMSE_Train_knn RMSE_Test_knn
## 1   Total Accidentes         87.688        53.026
## 2  Accidentes graves         50.479         44.62
## 3   Accidentes leves         50.141        41.293

3. MODELO LINEAL GENERALIZADO

Total Accidentes

glm_fit_s<-glm(TOTAL_ACCIDENTES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset, family = "poisson")
summary(glm_fit_s)
## 
## Call:
## glm(formula = TOTAL_ACCIDENTES ~ Ano_Base + SEMANA + Feria_Flores_Semana + 
##     Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson", 
##     data = Train_S_Dataset)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -11.6985   -0.9555    0.1415    0.9726    7.6856  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          6.277825   0.022888 274.290  < 2e-16 ***
## Ano_Base             0.020785   0.004869   4.269 1.97e-05 ***
## SEMANA02             0.183999   0.030950   5.945 2.76e-09 ***
## SEMANA03             0.351425   0.029174  12.046  < 2e-16 ***
## SEMANA04             0.358119   0.029008  12.345  < 2e-16 ***
## SEMANA05             0.399148   0.028782  13.868  < 2e-16 ***
## SEMANA06             0.440656   0.028561  15.428  < 2e-16 ***
## SEMANA07             0.461367   0.028454  16.215  < 2e-16 ***
## SEMANA08             0.435861   0.028586  15.247  < 2e-16 ***
## SEMANA09             0.429533   0.028620  15.008  < 2e-16 ***
## SEMANA10             0.491650   0.028300  17.373  < 2e-16 ***
## SEMANA11             0.468370   0.028418  16.482  < 2e-16 ***
## SEMANA12             0.436869   0.029664  14.727  < 2e-16 ***
## SEMANA13             0.435725   0.028991  15.030  < 2e-16 ***
## SEMANA14             0.483469   0.029074  16.629  < 2e-16 ***
## SEMANA15             0.475855   0.029142  16.329  < 2e-16 ***
## SEMANA16             0.407165   0.029494  13.805  < 2e-16 ***
## SEMANA17             0.490445   0.027860  17.604  < 2e-16 ***
## SEMANA18             0.481088   0.027977  17.196  < 2e-16 ***
## SEMANA19             0.461818   0.028578  16.160  < 2e-16 ***
## SEMANA20             0.463122   0.028445  16.281  < 2e-16 ***
## SEMANA21             0.451666   0.028626  15.778  < 2e-16 ***
## SEMANA22             0.410022   0.029146  14.068  < 2e-16 ***
## SEMANA23             0.468283   0.028828  16.244  < 2e-16 ***
## SEMANA24             0.440203   0.028687  15.345  < 2e-16 ***
## SEMANA25             0.383990   0.029281  13.114  < 2e-16 ***
## SEMANA26             0.324261   0.029621  10.947  < 2e-16 ***
## SEMANA27             0.407123   0.030244  13.461  < 2e-16 ***
## SEMANA28             0.431345   0.028610  15.077  < 2e-16 ***
## SEMANA29             0.468959   0.028163  16.651  < 2e-16 ***
## SEMANA30             0.467733   0.028543  16.387  < 2e-16 ***
## SEMANA31             0.491210   0.031121  15.784  < 2e-16 ***
## SEMANA32             0.463948   0.030715  15.105  < 2e-16 ***
## SEMANA33             0.474215   0.028514  16.631  < 2e-16 ***
## SEMANA34             0.422856   0.029519  14.325  < 2e-16 ***
## SEMANA35             0.466915   0.028425  16.426  < 2e-16 ***
## SEMANA36             0.440357   0.028563  15.417  < 2e-16 ***
## SEMANA37             0.498163   0.028267  17.623  < 2e-16 ***
## SEMANA38             0.498163   0.028267  17.623  < 2e-16 ***
## SEMANA39             0.425292   0.028642  14.848  < 2e-16 ***
## SEMANA40             0.492786   0.028294  17.416  < 2e-16 ***
## SEMANA41             0.371983   0.028931  12.858  < 2e-16 ***
## SEMANA42             0.438123   0.030078  14.566  < 2e-16 ***
## SEMANA43             0.442149   0.028553  15.485  < 2e-16 ***
## SEMANA44             0.433154   0.028601  15.145  < 2e-16 ***
## SEMANA45             0.384904   0.030365  12.676  < 2e-16 ***
## SEMANA46             0.444347   0.028962  15.342  < 2e-16 ***
## SEMANA47             0.430144   0.029021  14.822  < 2e-16 ***
## SEMANA48             0.427414   0.028631  14.928  < 2e-16 ***
## SEMANA49             0.452770   0.027968  16.189  < 2e-16 ***
## SEMANA50             0.458286   0.028171  16.268  < 2e-16 ***
## SEMANA51             0.502208   0.027800  18.065  < 2e-16 ***
## SEMANA52             0.303926   0.029276  10.381  < 2e-16 ***
## SEMANA53            -0.618838   0.047479 -13.034  < 2e-16 ***
## Feria_Flores_Semana  0.114436   0.025566   4.476 7.60e-06 ***
## Semana_Santa_Semana -0.249251   0.034801  -7.162 7.94e-13 ***
## Feriados_Lunes      -0.059120   0.008736  -6.767 1.31e-11 ***
## Feriados_Otros      -0.058858   0.012967  -4.539 5.65e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 3138.71  on 209  degrees of freedom
## Residual deviance:  636.17  on 152  degrees of freedom
## AIC: 2540.8
## 
## Number of Fisher Scoring iterations: 4
glm_fit_s
## 
## Call:  glm(formula = TOTAL_ACCIDENTES ~ Ano_Base + SEMANA + Feria_Flores_Semana + 
##     Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson", 
##     data = Train_S_Dataset)
## 
## Coefficients:
##         (Intercept)             Ano_Base             SEMANA02  
##             6.27783              0.02079              0.18400  
##            SEMANA03             SEMANA04             SEMANA05  
##             0.35142              0.35812              0.39915  
##            SEMANA06             SEMANA07             SEMANA08  
##             0.44066              0.46137              0.43586  
##            SEMANA09             SEMANA10             SEMANA11  
##             0.42953              0.49165              0.46837  
##            SEMANA12             SEMANA13             SEMANA14  
##             0.43687              0.43573              0.48347  
##            SEMANA15             SEMANA16             SEMANA17  
##             0.47585              0.40716              0.49045  
##            SEMANA18             SEMANA19             SEMANA20  
##             0.48109              0.46182              0.46312  
##            SEMANA21             SEMANA22             SEMANA23  
##             0.45167              0.41002              0.46828  
##            SEMANA24             SEMANA25             SEMANA26  
##             0.44020              0.38399              0.32426  
##            SEMANA27             SEMANA28             SEMANA29  
##             0.40712              0.43134              0.46896  
##            SEMANA30             SEMANA31             SEMANA32  
##             0.46773              0.49121              0.46395  
##            SEMANA33             SEMANA34             SEMANA35  
##             0.47421              0.42286              0.46691  
##            SEMANA36             SEMANA37             SEMANA38  
##             0.44036              0.49816              0.49816  
##            SEMANA39             SEMANA40             SEMANA41  
##             0.42529              0.49279              0.37198  
##            SEMANA42             SEMANA43             SEMANA44  
##             0.43812              0.44215              0.43315  
##            SEMANA45             SEMANA46             SEMANA47  
##             0.38490              0.44435              0.43014  
##            SEMANA48             SEMANA49             SEMANA50  
##             0.42741              0.45277              0.45829  
##            SEMANA51             SEMANA52             SEMANA53  
##             0.50221              0.30393             -0.61884  
## Feria_Flores_Semana  Semana_Santa_Semana       Feriados_Lunes  
##             0.11444             -0.24925             -0.05912  
##      Feriados_Otros  
##            -0.05886  
## 
## Degrees of Freedom: 209 Total (i.e. Null);  152 Residual
## Null Deviance:       3139 
## Residual Deviance: 636.2     AIC: 2541

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_glm_s<-predict(glm_fit_s,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_tr_glm_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_tr_pred_glm_s)^2) # calcula el mse de entrenamiento
RMSE_tr_glm_s = sqrt(mse_tr_glm_s)
mse_tr_glm_s
## [1] 1940.939
RMSE_tr_glm_s
## [1] 44.05609

Calculo MSE y RMSE para los datos de validación

y_test_pred_glm_s<-predict(glm_fit_s,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_test_glm_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_test_pred_glm_s)^2) # calcula el mse de entrenamiento
## Warning in Train_S_Dataset$TOTAL_ACCIDENTES - y_test_pred_glm_s: longitud
## de objeto mayor no es múltiplo de la longitud de uno menor
RMSE_test_glm_s = sqrt(mse_test_glm_s)
mse_test_glm_s
## [1] 7321.709
RMSE_test_glm_s
## [1] 85.56698

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~TOTAL_ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_glm_s,
            name='Modelo glm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~TOTAL_ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_glm_s,
            name='Modelo glm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total Accidentes',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Accidentes Graves

glm_fit_s_m<-glm(ACCIDENTES_GRAVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset, family = "poisson")
summary(glm_fit_s)
## 
## Call:
## glm(formula = TOTAL_ACCIDENTES ~ Ano_Base + SEMANA + Feria_Flores_Semana + 
##     Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson", 
##     data = Train_S_Dataset)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -11.6985   -0.9555    0.1415    0.9726    7.6856  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          6.277825   0.022888 274.290  < 2e-16 ***
## Ano_Base             0.020785   0.004869   4.269 1.97e-05 ***
## SEMANA02             0.183999   0.030950   5.945 2.76e-09 ***
## SEMANA03             0.351425   0.029174  12.046  < 2e-16 ***
## SEMANA04             0.358119   0.029008  12.345  < 2e-16 ***
## SEMANA05             0.399148   0.028782  13.868  < 2e-16 ***
## SEMANA06             0.440656   0.028561  15.428  < 2e-16 ***
## SEMANA07             0.461367   0.028454  16.215  < 2e-16 ***
## SEMANA08             0.435861   0.028586  15.247  < 2e-16 ***
## SEMANA09             0.429533   0.028620  15.008  < 2e-16 ***
## SEMANA10             0.491650   0.028300  17.373  < 2e-16 ***
## SEMANA11             0.468370   0.028418  16.482  < 2e-16 ***
## SEMANA12             0.436869   0.029664  14.727  < 2e-16 ***
## SEMANA13             0.435725   0.028991  15.030  < 2e-16 ***
## SEMANA14             0.483469   0.029074  16.629  < 2e-16 ***
## SEMANA15             0.475855   0.029142  16.329  < 2e-16 ***
## SEMANA16             0.407165   0.029494  13.805  < 2e-16 ***
## SEMANA17             0.490445   0.027860  17.604  < 2e-16 ***
## SEMANA18             0.481088   0.027977  17.196  < 2e-16 ***
## SEMANA19             0.461818   0.028578  16.160  < 2e-16 ***
## SEMANA20             0.463122   0.028445  16.281  < 2e-16 ***
## SEMANA21             0.451666   0.028626  15.778  < 2e-16 ***
## SEMANA22             0.410022   0.029146  14.068  < 2e-16 ***
## SEMANA23             0.468283   0.028828  16.244  < 2e-16 ***
## SEMANA24             0.440203   0.028687  15.345  < 2e-16 ***
## SEMANA25             0.383990   0.029281  13.114  < 2e-16 ***
## SEMANA26             0.324261   0.029621  10.947  < 2e-16 ***
## SEMANA27             0.407123   0.030244  13.461  < 2e-16 ***
## SEMANA28             0.431345   0.028610  15.077  < 2e-16 ***
## SEMANA29             0.468959   0.028163  16.651  < 2e-16 ***
## SEMANA30             0.467733   0.028543  16.387  < 2e-16 ***
## SEMANA31             0.491210   0.031121  15.784  < 2e-16 ***
## SEMANA32             0.463948   0.030715  15.105  < 2e-16 ***
## SEMANA33             0.474215   0.028514  16.631  < 2e-16 ***
## SEMANA34             0.422856   0.029519  14.325  < 2e-16 ***
## SEMANA35             0.466915   0.028425  16.426  < 2e-16 ***
## SEMANA36             0.440357   0.028563  15.417  < 2e-16 ***
## SEMANA37             0.498163   0.028267  17.623  < 2e-16 ***
## SEMANA38             0.498163   0.028267  17.623  < 2e-16 ***
## SEMANA39             0.425292   0.028642  14.848  < 2e-16 ***
## SEMANA40             0.492786   0.028294  17.416  < 2e-16 ***
## SEMANA41             0.371983   0.028931  12.858  < 2e-16 ***
## SEMANA42             0.438123   0.030078  14.566  < 2e-16 ***
## SEMANA43             0.442149   0.028553  15.485  < 2e-16 ***
## SEMANA44             0.433154   0.028601  15.145  < 2e-16 ***
## SEMANA45             0.384904   0.030365  12.676  < 2e-16 ***
## SEMANA46             0.444347   0.028962  15.342  < 2e-16 ***
## SEMANA47             0.430144   0.029021  14.822  < 2e-16 ***
## SEMANA48             0.427414   0.028631  14.928  < 2e-16 ***
## SEMANA49             0.452770   0.027968  16.189  < 2e-16 ***
## SEMANA50             0.458286   0.028171  16.268  < 2e-16 ***
## SEMANA51             0.502208   0.027800  18.065  < 2e-16 ***
## SEMANA52             0.303926   0.029276  10.381  < 2e-16 ***
## SEMANA53            -0.618838   0.047479 -13.034  < 2e-16 ***
## Feria_Flores_Semana  0.114436   0.025566   4.476 7.60e-06 ***
## Semana_Santa_Semana -0.249251   0.034801  -7.162 7.94e-13 ***
## Feriados_Lunes      -0.059120   0.008736  -6.767 1.31e-11 ***
## Feriados_Otros      -0.058858   0.012967  -4.539 5.65e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 3138.71  on 209  degrees of freedom
## Residual deviance:  636.17  on 152  degrees of freedom
## AIC: 2540.8
## 
## Number of Fisher Scoring iterations: 4
glm_fit_s_m
## 
## Call:  glm(formula = ACCIDENTES_GRAVES ~ Ano_Base + SEMANA + Feria_Flores_Semana + 
##     Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson", 
##     data = Train_S_Dataset)
## 
## Coefficients:
##         (Intercept)             Ano_Base             SEMANA02  
##            5.788318             0.009836             0.099524  
##            SEMANA03             SEMANA04             SEMANA05  
##            0.286121             0.245220             0.324299  
##            SEMANA06             SEMANA07             SEMANA08  
##            0.381579             0.338522             0.364261  
##            SEMANA09             SEMANA10             SEMANA11  
##            0.340150             0.402177             0.397580  
##            SEMANA12             SEMANA13             SEMANA14  
##            0.357368             0.359821             0.394598  
##            SEMANA15             SEMANA16             SEMANA17  
##            0.357023             0.341206             0.373905  
##            SEMANA18             SEMANA19             SEMANA20  
##            0.353534             0.359211             0.357888  
##            SEMANA21             SEMANA22             SEMANA23  
##            0.359651             0.325816             0.392989  
##            SEMANA24             SEMANA25             SEMANA26  
##            0.351563             0.296681             0.219235  
##            SEMANA27             SEMANA28             SEMANA29  
##            0.323757             0.347714             0.368714  
##            SEMANA30             SEMANA31             SEMANA32  
##            0.367675             0.392465             0.346583  
##            SEMANA33             SEMANA34             SEMANA35  
##            0.385200             0.354601             0.423857  
##            SEMANA36             SEMANA37             SEMANA38  
##            0.343399             0.413832             0.400137  
##            SEMANA39             SEMANA40             SEMANA41  
##            0.358420             0.382100             0.251165  
##            SEMANA42             SEMANA43             SEMANA44  
##            0.349806             0.343939             0.312661  
##            SEMANA45             SEMANA46             SEMANA47  
##            0.294651             0.309409             0.309212  
##            SEMANA48             SEMANA49             SEMANA50  
##            0.292390             0.288662             0.299181  
##            SEMANA51             SEMANA52             SEMANA53  
##            0.347609             0.240504            -0.703891  
## Feria_Flores_Semana  Semana_Santa_Semana       Feriados_Lunes  
##            0.108498            -0.256489            -0.039936  
##      Feriados_Otros  
##           -0.041126  
## 
## Degrees of Freedom: 209 Total (i.e. Null);  152 Residual
## Null Deviance:       1654 
## Residual Deviance: 458.8     AIC: 2241

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_glm_s_m<-predict(glm_fit_s_m,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_tr_glm_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_tr_pred_glm_s_m)^2) # calcula el mse de entrenamiento
RMSE_tr_glm_s_m = sqrt(mse_tr_glm_s_m)
mse_tr_glm_s_m
## [1] 897.2075
RMSE_tr_glm_s_m
## [1] 29.95342

Calculo MSE y RMSE para los datos de validación

y_test_pred_glm_s_m<-predict(glm_fit_s_m,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_test_glm_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_test_pred_glm_s_m)^2) # calcula el mse de entrenamiento
## Warning in Train_S_Dataset$ACCIDENTES_GRAVES - y_test_pred_glm_s_m:
## longitud de objeto mayor no es múltiplo de la longitud de uno menor
RMSE_test_glm_s_m = sqrt(mse_test_glm_s_m)
mse_test_glm_s_m
## [1] 2280.648
RMSE_test_glm_s_m
## [1] 47.75613

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_GRAVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_glm_s_m,
            name='Modelo glm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes graves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes graves"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_GRAVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_glm_s_m,
            name='Modelo glm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes graves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes graves"),
         legend = list(x = 1, y = 0.4))

Accidentes Leves

glm_fit_s_sd<-glm(ACCIDENTES_LEVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset, family = "poisson")
summary(glm_fit_s_sd)
## 
## Call:
## glm(formula = ACCIDENTES_LEVES ~ Ano_Base + SEMANA + Feria_Flores_Semana + 
##     Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson", 
##     data = Train_S_Dataset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -9.4088  -0.8213   0.0148   0.9198   6.4250  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          5.327381   0.036599 145.562  < 2e-16 ***
## Ano_Base             0.034689   0.007316   4.741 2.12e-06 ***
## SEMANA02             0.305737   0.048405   6.316 2.68e-10 ***
## SEMANA03             0.448028   0.045902   9.761  < 2e-16 ***
## SEMANA04             0.515198   0.045126  11.417  < 2e-16 ***
## SEMANA05             0.508763   0.045177  11.262  < 2e-16 ***
## SEMANA06             0.529353   0.045015  11.760  < 2e-16 ***
## SEMANA07             0.630206   0.044260  14.239  < 2e-16 ***
## SEMANA08             0.541229   0.044922  12.048  < 2e-16 ***
## SEMANA09             0.557758   0.044796  12.451  < 2e-16 ***
## SEMANA10             0.619988   0.044333  13.985  < 2e-16 ***
## SEMANA11             0.572673   0.044683  12.816  < 2e-16 ***
## SEMANA12             0.552064   0.046546  11.861  < 2e-16 ***
## SEMANA13             0.546106   0.045546  11.990  < 2e-16 ***
## SEMANA14             0.611089   0.045487  13.434  < 2e-16 ***
## SEMANA15             0.640681   0.045323  14.136  < 2e-16 ***
## SEMANA16             0.504788   0.046348  10.891  < 2e-16 ***
## SEMANA17             0.652152   0.043531  14.981  < 2e-16 ***
## SEMANA18             0.656486   0.043692  15.025  < 2e-16 ***
## SEMANA19             0.606659   0.044655  13.586  < 2e-16 ***
## SEMANA20             0.610962   0.044399  13.761  < 2e-16 ***
## SEMANA21             0.583114   0.044819  13.010  < 2e-16 ***
## SEMANA22             0.531601   0.045705  11.631  < 2e-16 ***
## SEMANA23             0.577994   0.045319  12.754  < 2e-16 ***
## SEMANA24             0.567365   0.044939  12.625  < 2e-16 ***
## SEMANA25             0.509450   0.045861  11.108  < 2e-16 ***
## SEMANA26             0.472169   0.046170  10.227  < 2e-16 ***
## SEMANA27             0.527185   0.047417  11.118  < 2e-16 ***
## SEMANA28             0.552278   0.044837  12.317  < 2e-16 ***
## SEMANA29             0.610870   0.044224  13.813  < 2e-16 ***
## SEMANA30             0.609263   0.044623  13.654  < 2e-16 ***
## SEMANA31             0.631011   0.048329  13.057  < 2e-16 ***
## SEMANA32             0.626940   0.047592  13.173  < 2e-16 ***
## SEMANA33             0.602000   0.044690  13.471  < 2e-16 ***
## SEMANA34             0.522700   0.046451  11.253  < 2e-16 ***
## SEMANA35             0.533560   0.044982  11.862  < 2e-16 ***
## SEMANA36             0.578043   0.044642  12.948  < 2e-16 ***
## SEMANA37             0.619988   0.044333  13.985  < 2e-16 ***
## SEMANA38             0.637170   0.044210  14.412  < 2e-16 ***
## SEMANA39             0.524421   0.045053  11.640  < 2e-16 ***
## SEMANA40             0.647214   0.044138  14.663  < 2e-16 ***
## SEMANA41             0.538447   0.044944  11.980  < 2e-16 ***
## SEMANA42             0.564755   0.047119  11.986  < 2e-16 ***
## SEMANA43             0.581384   0.044617  13.030  < 2e-16 ***
## SEMANA44             0.599235   0.044485  13.471  < 2e-16 ***
## SEMANA45             0.514082   0.047524  10.817  < 2e-16 ***
## SEMANA46             0.628373   0.044961  13.976  < 2e-16 ***
## SEMANA47             0.597291   0.045150  13.229  < 2e-16 ***
## SEMANA48             0.610314   0.044404  13.745  < 2e-16 ***
## SEMANA49             0.669428   0.043325  15.451  < 2e-16 ***
## SEMANA50             0.669221   0.043646  15.333  < 2e-16 ***
## SEMANA51             0.707653   0.043136  16.405  < 2e-16 ***
## SEMANA52             0.396460   0.046321   8.559  < 2e-16 ***
## SEMANA53            -0.496270   0.072955  -6.802 1.03e-11 ***
## Feria_Flores_Semana  0.122084   0.038117   3.203  0.00136 ** 
## Semana_Santa_Semana -0.241999   0.052486  -4.611 4.01e-06 ***
## Feriados_Lunes      -0.083630   0.013185  -6.343 2.25e-10 ***
## Feriados_Otros      -0.081229   0.019322  -4.204 2.62e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2008.38  on 209  degrees of freedom
## Residual deviance:  513.12  on 152  degrees of freedom
## AIC: 2245.6
## 
## Number of Fisher Scoring iterations: 4
glm_fit_s_sd
## 
## Call:  glm(formula = ACCIDENTES_LEVES ~ Ano_Base + SEMANA + Feria_Flores_Semana + 
##     Semana_Santa_Semana + Feriados_Lunes + Feriados_Otros, family = "poisson", 
##     data = Train_S_Dataset)
## 
## Coefficients:
##         (Intercept)             Ano_Base             SEMANA02  
##             5.32738              0.03469              0.30574  
##            SEMANA03             SEMANA04             SEMANA05  
##             0.44803              0.51520              0.50876  
##            SEMANA06             SEMANA07             SEMANA08  
##             0.52935              0.63021              0.54123  
##            SEMANA09             SEMANA10             SEMANA11  
##             0.55776              0.61999              0.57267  
##            SEMANA12             SEMANA13             SEMANA14  
##             0.55206              0.54611              0.61109  
##            SEMANA15             SEMANA16             SEMANA17  
##             0.64068              0.50479              0.65215  
##            SEMANA18             SEMANA19             SEMANA20  
##             0.65649              0.60666              0.61096  
##            SEMANA21             SEMANA22             SEMANA23  
##             0.58311              0.53160              0.57799  
##            SEMANA24             SEMANA25             SEMANA26  
##             0.56737              0.50945              0.47217  
##            SEMANA27             SEMANA28             SEMANA29  
##             0.52718              0.55228              0.61087  
##            SEMANA30             SEMANA31             SEMANA32  
##             0.60926              0.63101              0.62694  
##            SEMANA33             SEMANA34             SEMANA35  
##             0.60200              0.52270              0.53356  
##            SEMANA36             SEMANA37             SEMANA38  
##             0.57804              0.61999              0.63717  
##            SEMANA39             SEMANA40             SEMANA41  
##             0.52442              0.64721              0.53845  
##            SEMANA42             SEMANA43             SEMANA44  
##             0.56475              0.58138              0.59924  
##            SEMANA45             SEMANA46             SEMANA47  
##             0.51408              0.62837              0.59729  
##            SEMANA48             SEMANA49             SEMANA50  
##             0.61031              0.66943              0.66922  
##            SEMANA51             SEMANA52             SEMANA53  
##             0.70765              0.39646             -0.49627  
## Feria_Flores_Semana  Semana_Santa_Semana       Feriados_Lunes  
##             0.12208             -0.24200             -0.08363  
##      Feriados_Otros  
##            -0.08123  
## 
## Degrees of Freedom: 209 Total (i.e. Null);  152 Residual
## Null Deviance:       2008 
## Residual Deviance: 513.1     AIC: 2246

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_glm_s_sd<-predict(glm_fit_s_sd,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_tr_glm_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_tr_pred_glm_s_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_glm_s_sd = sqrt(mse_tr_glm_s_sd)
mse_tr_glm_s_sd
## [1] 669.6687
RMSE_tr_glm_s_sd
## [1] 25.87796

Calculo MSE y RMSE para los datos de validación

y_test_pred_glm_s_sd<-predict(glm_fit_s_sd,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")],type="response")
mse_test_glm_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_test_pred_glm_s_sd)^2) # calcula el mse de entrenamiento
## Warning in Train_S_Dataset$ACCIDENTES_LEVES - y_test_pred_glm_s_sd:
## longitud de objeto mayor no es múltiplo de la longitud de uno menor
RMSE_test_glm_s_sd = sqrt(mse_test_glm_s_sd)
mse_test_glm_s_sd
## [1] 2241.203
RMSE_test_glm_s_sd
## [1] 47.34135

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_LEVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_glm_s_sd,
            name='Modelo glm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total Accidentes leves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes leves"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_LEVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_glm_s_sd,
            name='Modelo glm',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total Accidentes leves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes leves"),
         legend = list(x = 1, y = 0.4))

#### REsumen Modelos Regresión lineal generalizado para los diferentes tipos de accidente

Tipo_de_accidentes= c("Total Accidentes","Accidentes Graves","Accidentes Leves")
RMSE_Train_glm = round(c(RMSE_tr_glm_s,RMSE_tr_glm_s_m,RMSE_tr_glm_s_sd), 3)
RMSE_Test_glm = round(c(RMSE_test_glm_s,RMSE_test_glm_s_m,RMSE_test_glm_s_sd),3)

Tabla_glm = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_glm,RMSE_Test_glm))
Tabla_glm
##   Tipo_de_accidentes RMSE_Train_glm RMSE_Test_glm
## 1   Total Accidentes         44.056        85.567
## 2  Accidentes Graves         29.953        47.756
## 3   Accidentes Leves         25.878        47.341

4. ARBOLES DE REGRESION

Total Accidentes

trcntrl = trainControl(method="cv", number=10)
caret_tree_fit_s = caret::train(TOTAL_ACCIDENTES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros,data=Train_S_Dataset,
                              method = "rpart", trControl = trcntrl,
                      parms = list(split = "gini"),
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut
## = 10, : These variables have zero variances: SEMANA53
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
caret_tree_fit_s
## CART 
## 
## 210 samples
##   6 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 190, 190, 188, 189, 190, 189, ... 
## Resampling results across tuning parameters:
## 
##   cp          RMSE      Rsquared    MAE     
##   0.00000000  90.03839  0.22597915  60.02933
##   0.01029619  89.86132  0.23564610  60.36643
##   0.02059239  89.40535  0.23917583  60.42981
##   0.03088858  89.40535  0.23917583  60.42981
##   0.04118477  89.40535  0.23917583  60.42981
##   0.05148097  91.10976  0.21309430  61.82791
##   0.06177716  92.78224  0.18066210  62.41575
##   0.07207335  94.84337  0.14490896  64.49534
##   0.08236955  98.03159  0.09107280  68.27761
##   0.09266574  99.56088  0.03930042  70.17119
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.04118477.

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_tree_s<-predict(caret_tree_fit_s,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_tree_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_tr_pred_tree_s)^2) # calcula el mse de entrenamiento
RMSE_tr_tree_s = sqrt(mse_tr_tree_s)
mse_tr_tree_s
## [1] 8571.789
RMSE_tr_tree_s
## [1] 92.58396

Calculo MSE y RMSE para los datos de validación

  y_test_pred_tree_s<-predict(caret_tree_fit_s,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_tree_s<-mean((Test_S_Dataset$TOTAL_ACCIDENTES-y_test_pred_tree_s)^2) # calcula el mse de entrenamiento
RMSE_test_tree_s = sqrt(mse_test_tree_s)
mse_test_tree_s
## [1] 3909.887
RMSE_test_tree_s
## [1] 62.52909

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~TOTAL_ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_tree_s,
            name='Modelo tree',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~TOTAL_ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_tree_s,
            name='Modelo tree',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Accidentes graves

trcntrl = trainControl(method="cv", number=10)
caret_tree_fit_s_m = caret::train(ACCIDENTES_GRAVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros,data=Train_S_Dataset,
                              method = "rpart", trControl = trcntrl,
                      parms = list(split = "gini"),
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
caret_tree_fit_s_m
## CART 
## 
## 210 samples
##   6 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 189, 190, 188, 190, 189, 190, ... 
## Resampling results across tuning parameters:
## 
##   cp           RMSE      Rsquared    MAE     
##   0.000000000  52.11449  0.10711890  36.62441
##   0.009507413  51.37813  0.11978591  36.27833
##   0.019014826  50.59740  0.14649515  35.84655
##   0.028522239  50.59740  0.14649515  35.84655
##   0.038029652  51.74154  0.09941232  36.80310
##   0.047537065  52.32083  0.09155580  37.29817
##   0.057044478  53.15921  0.06724254  37.87887
##   0.066551891  52.92938  0.07832094  37.79743
##   0.076059304  53.88846  0.03710721  38.80926
##   0.085566717  53.88846  0.03710721  38.80926
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.02852224.

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_tree_s_m<-predict(caret_tree_fit_s_m,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_tree_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_tr_pred_tree_s_m)^2) # calcula el mse de entrenamiento
RMSE_tr_tree_s_m = sqrt(mse_tr_tree_s_m)
mse_tr_tree_s_m
## [1] 2702.14
RMSE_tr_tree_s_m
## [1] 51.98211

Calculo MSE y RMSE para los datos de validación

y_test_pred_tree_s_m<-predict(caret_tree_fit_s_m,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_tree_s_m<-mean((Test_S_Dataset$ACCIDENTES_GRAVES-y_test_pred_tree_s_m)^2) # calcula el mse de entrenamiento
RMSE_test_tree_s_m = sqrt(mse_test_tree_s_m)
mse_test_tree_s_m
## [1] 2493.983
RMSE_test_tree_s_m
## [1] 49.93979

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_GRAVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_tree_s_m,
            name='Modelo tree',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Accidentes graves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_GRAVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_tree_s_m,
            name='Modelo tree',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Accidentes graves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Accidentes leves

trcntrl = trainControl(method="cv", number=10)
caret_tree_fit_s_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros,data=Train_S_Dataset,
                              method = "rpart", trControl = trcntrl,
                      parms = list(split = "gini"),
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
caret_tree_fit_s_sd
## CART 
## 
## 210 samples
##   6 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 188, 190, 190, 188, 188, 190, ... 
## Resampling results across tuning parameters:
## 
##   cp           RMSE      Rsquared    MAE     
##   0.000000000  48.46375  0.15751144  34.53645
##   0.008958719  48.66304  0.14614145  34.69539
##   0.017917439  48.38173  0.15074617  34.34427
##   0.026876158  48.38173  0.15074617  34.34427
##   0.035834877  48.38173  0.15074617  34.34427
##   0.044793597  49.01975  0.12905765  34.52015
##   0.053752316  49.01975  0.12905765  34.52015
##   0.062711035  49.46213  0.12006180  34.77398
##   0.071669755  51.71397  0.03995370  36.56575
##   0.080628474  52.35869  0.02865765  37.31018
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.03583488.

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_tree_s_sd<-predict(caret_tree_fit_s_sd,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_tree_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_tr_pred_tree_s_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_tree_s_sd = sqrt(mse_tr_tree_s_sd)
mse_tr_tree_s_sd
## [1] 2471.872
RMSE_tr_tree_s_sd
## [1] 49.71793

Calculo MSE y RMSE para los datos de validación

  y_test_pred_tree_s_sd<-predict(caret_tree_fit_s_sd,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_tree_s_sd<-mean((Test_S_Dataset$ACCIDENTES_LEVES-y_test_pred_tree_s_sd)^2) # calcula el mse de entrenamiento
RMSE_test_tree_s_sd = sqrt(mse_test_tree_s_sd)
mse_test_tree_s_sd
## [1] 1766.186
RMSE_test_tree_s_sd
## [1] 42.02602

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_LEVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_tree_s_sd,
            name='Modelo tree',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes leves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes leves"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_LEVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_tree_s_sd,
            name='Modelo tree',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes leves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes leves"),
         legend = list(x = 1, y = 0.4))

Resumen Modelos Árboles de regresión para los diferentes tipos de accidente

Tipo_de_accidentes= c("Total Accidentes","Accidentes Graves","Accidentes Leves")
RMSE_Train_tree = round(c(RMSE_tr_tree_s,RMSE_tr_tree_s_m,RMSE_tr_tree_s_sd), 3)
RMSE_Test_tree = round(c(RMSE_test_tree_s,RMSE_test_tree_s_m,RMSE_test_tree_s_sd),3)

Tabla_tree = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_tree,RMSE_Test_tree))
Tabla_tree
##   Tipo_de_accidentes RMSE_Train_tree RMSE_Test_tree
## 1   Total Accidentes          92.584         62.529
## 2  Accidentes Graves          51.982          49.94
## 3   Accidentes Leves          49.718         42.026

5. BOSQUE ALEATORIO

Total Accidentes

trcntrl = trainControl(method="cv", number=10)
caret_rf_fit_s = caret::train(TOTAL_ACCIDENTES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
                      method = "rf", trControl = trcntrl,
                      prox=TRUE,allowParallel=TRUE)
summary(caret_rf_fit_s)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted         210  -none-     numeric  
## mse               500  -none-     numeric  
## rsq               500  -none-     numeric  
## oob.times         210  -none-     numeric  
## importance         57  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity       44100  -none-     numeric  
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             11  -none-     list     
## coefs               0  -none-     NULL     
## y                 210  -none-     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames             57  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           1  -none-     logical  
## param               2  -none-     list
caret_rf_fit_s
## Random Forest 
## 
## 210 samples
##   6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 188, 187, 189, 190, 190, 190, ... 
## Resampling results across tuning parameters:
## 
##   mtry  RMSE      Rsquared   MAE     
##    2    78.31259  0.5147516  55.17002
##   29    80.39827  0.3457506  60.11523
##   57    82.26926  0.3282961  62.07319
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 2.
plot(caret_rf_fit_s)

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_rf_s<-predict(caret_rf_fit_s,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_rf_s<-mean((Train_S_Dataset$TOTAL_ACCIDENTES-y_tr_pred_rf_s)^2) # calcula el mse de entrenamiento
RMSE_tr_rf_s = sqrt(mse_tr_rf_s)
mse_tr_rf_s
## [1] 5745.099
RMSE_tr_rf_s
## [1] 75.79643

Calculo MSE y RMSE para los datos de validación

y_test_pred_rf_s<-predict(caret_rf_fit_s,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_rf_s<-mean((Test_S_Dataset$TOTAL_ACCIDENTES-y_test_pred_rf_s)^2) # calcula el mse de entrenamiento
RMSE_test_rf_s = sqrt(mse_test_rf_s)
mse_test_rf_s
## [1] 3400.533
RMSE_test_rf_s
## [1] 58.31409

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~TOTAL_ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_rf_s,
            name='Modelo rf',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~TOTAL_ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y=  ~y_test_pred_rf_s,
            name='Modelo rf',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.4))

Accidentes Graves

trcntrl = trainControl(method="cv", number=10)
caret_rf_fit_s_m = caret::train(ACCIDENTES_GRAVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
                      method = "rf", trControl = trcntrl,
                      prox=TRUE,allowParallel=TRUE)
summary(caret_rf_fit_s_m)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted         210  -none-     numeric  
## mse               500  -none-     numeric  
## rsq               500  -none-     numeric  
## oob.times         210  -none-     numeric  
## importance         57  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity       44100  -none-     numeric  
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             11  -none-     list     
## coefs               0  -none-     NULL     
## y                 210  -none-     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames             57  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           1  -none-     logical  
## param               2  -none-     list
caret_rf_fit_s_m
## Random Forest 
## 
## 210 samples
##   6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 188, 189, 189, 188, 190, 189, ... 
## Resampling results across tuning parameters:
## 
##   mtry  RMSE      Rsquared   MAE     
##    2    47.53115  0.3604442  34.49706
##   29    46.16836  0.2951952  35.33219
##   57    46.97492  0.2832970  36.04291
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 29.
plot(caret_rf_fit_s_m)

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_rf_s_m<-predict(caret_rf_fit_s_m,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_rf_s_m<-mean((Train_S_Dataset$ACCIDENTES_GRAVES-y_tr_pred_rf_s_m)^2) # calcula el mse de entrenamiento
RMSE_tr_rf_s_m = sqrt(mse_tr_rf_s_m)
mse_tr_rf_s_m
## [1] 849.2143
RMSE_tr_rf_s_m
## [1] 29.14128

Calculo MSE y RMSE para los datos de validación

y_test_pred_rf_s_m<-predict(caret_rf_fit_s_m,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_rf_s_m<-mean((Test_S_Dataset$ACCIDENTES_GRAVES-y_test_pred_rf_s_m)^2) # calcula el mse de entrenamiento
RMSE_test_rf_s_m = sqrt(mse_test_rf_s_m)
mse_test_rf_s_m
## [1] 2512.328
RMSE_test_rf_s_m
## [1] 50.12313

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_GRAVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_rf_s_m,
            name='Modelo rf',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes graves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes graves"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_GRAVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_rf_s_m,
            name='Modelo rf',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes graves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes graves"),
         legend = list(x = 1, y = 0.4))

Accidentes Leves

trcntrl = trainControl(method="cv", number=10)
caret_rf_fit_s_sd = caret::train(ACCIDENTES_LEVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Train_S_Dataset,
                      method = "rf", trControl = trcntrl,
                      prox=TRUE,allowParallel=TRUE)
summary(caret_rf_fit_s_sd)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted         210  -none-     numeric  
## mse               500  -none-     numeric  
## rsq               500  -none-     numeric  
## oob.times         210  -none-     numeric  
## importance         57  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity       44100  -none-     numeric  
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             11  -none-     list     
## coefs               0  -none-     NULL     
## y                 210  -none-     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames             57  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           1  -none-     logical  
## param               2  -none-     list
caret_rf_fit_s_sd
## Random Forest 
## 
## 210 samples
##   6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 190, 189, 189, 188, 188, 190, ... 
## Resampling results across tuning parameters:
## 
##   mtry  RMSE      Rsquared   MAE     
##    2    44.56759  0.4451170  31.98620
##   29    47.05826  0.2588825  35.01703
##   57    48.14412  0.2501655  35.95330
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 2.
plot(caret_rf_fit_s_sd)

Calculo MSE y RMSE para los datos de entrenamiento

y_tr_pred_rf_s_sd<-predict(caret_rf_fit_s_sd,Train_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_tr_rf_s_sd<-mean((Train_S_Dataset$ACCIDENTES_LEVES-y_tr_pred_rf_s_sd)^2) # calcula el mse de entrenamiento
RMSE_tr_rf_s_sd = sqrt(mse_tr_rf_s_sd)
mse_tr_rf_s_sd
## [1] 1764.784
RMSE_tr_rf_s_sd
## [1] 42.00934

Calculo MSE y RMSE para los datos de validación

y_test_pred_rf_s_sd<-predict(caret_rf_fit_s_sd,Test_S_Dataset[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])
mse_test_rf_s_sd<-mean((Test_S_Dataset$ACCIDENTES_LEVES-y_test_pred_rf_s_sd)^2) # calcula el mse de entrenamiento
RMSE_test_rf_s_sd = sqrt(mse_test_rf_s_sd)
mse_test_rf_s_sd
## [1] 1392.295
RMSE_test_rf_s_sd
## [1] 37.31347

Predicción en la muestra

plot_ly (data=Train_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_LEVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_tr_pred_rf_s_sd,
            name='Modelo rf',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes leves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes leves"),
         legend = list(x = 1, y = 0.4))

Gráfica serie 2018

plot_ly (data=Test_S_Dataset,
         x = ~Ano_Sem,
         y = ~ACCIDENTES_LEVES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~y_test_pred_rf_s_sd,
            name='Modelo rf',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  layout(title='Total accidentes leves',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes leves"),
         legend = list(x = 1, y = 0.4))

Resumen Modelos Random Forest para los diferentes tipos de accidente

Tipo_de_accidentes= c("Total Accidentes","Total Graves","Total Leves")
RMSE_Train_rf = round(c(RMSE_tr_rf_s,RMSE_tr_rf_s_m,RMSE_tr_rf_s_sd), 3)
RMSE_Test_rf = round(c(RMSE_test_rf_s,RMSE_test_rf_s_m,RMSE_test_rf_s_sd),3)

Tabla_rf = data.frame (cbind(Tipo_de_accidentes,RMSE_Train_rf,RMSE_Test_rf))
Tabla_rf
##   Tipo_de_accidentes RMSE_Train_rf RMSE_Test_rf
## 1   Total Accidentes        75.796       58.314
## 2       Total Graves        29.141       50.123
## 3        Total Leves        42.009       37.313

ELECCION DEL MODELO

1. Elección del modelo para el total de accidentes

Comparación en el entrenamiento

comparacion_tr<-data.frame(Ano_Sem=Total_Dataset_Freq_S$Ano_Sem[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
                          ACCIDENTES=Total_Dataset_Freq_S$TOTAL_ACCIDENTES[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
                           lm= y_tr_pred_lm_s, 
                           knn= y_tr_pred_knn_s, 
                           glm=y_tr_pred_glm_s ,
                           arbol=y_tr_pred_tree_s,
                           rf=y_tr_pred_rf_s)
plot_ly (data=comparacion_tr,
         x = ~Ano_Sem,
         y = ~ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~lm,
            name='lm',
            line=list(width=1,color= "blue"))%>%
  add_trace(y= ~knn,
            name='knn',
            line=list(width=1,color="red"))%>%
  add_trace(y= ~glm,
            name='Modelo Poisson',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  add_trace(y= ~arbol,
            name='Árbol',
            line=list(width=1,color="green"))%>%
    add_trace(y= ~rf,
            name='Bosque',
            line=list(width=1,color='rgb(255, 51, 153)'))%>%
  layout(title='Total Accidentes (Entrenamiento)',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Unidades"),
         legend = list(x = 1, y = 0.9))

Comparación en la validación

comparacion_vl<-data.frame(Ano_Sem=Test_S_Dataset$Ano_Sem,
                          ACCIDENTES=Test_S_Dataset$TOTAL_ACCIDENTES,
                           lm= y_test_pred_lm_s, 
                           knn= y_test_pred_knn_s, 
                           glm=y_test_pred_glm_s ,
                           arbol=y_test_pred_tree_s,
                           rf=y_test_pred_rf_s)
plot_ly (data=comparacion_vl,
         x = ~Ano_Sem,
         y = ~ACCIDENTES,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~lm,
            name='lm',
            line=list(width=1,color= "blue"))%>%
  add_trace(y= ~knn,
            name='knn',
            line=list(width=1,color="red"))%>%
  add_trace(y= ~glm,
            name='Modelo Poisson',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  add_trace(y= ~arbol,
            name='Árbol',
            line=list(width=1,color="green"))%>%
  add_trace(y= ~rf,
            name='Bosque',
            line=list(width=1,color='rgb(255, 51, 153)'))%>%
  layout(title='Total Accidentes (Validación)',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.9))

Comparación con los RMSE:

Entrenamiento<-round(c(RMSE_tr_lm_s,RMSE_tr_knn_s,RMSE_tr_glm_s,RMSE_tr_tree_s,RMSE_tr_rf_s),3) 
Validacion<-round(c(RMSE_test_lm_s,RMSE_test_knn_s,RMSE_test_glm_s,RMSE_test_tree_s,RMSE_test_rf_s),3) 
nombres<-c("lm","knn","glm","árbol","bosque")
ResultadosRMSE<-data.frame(Entrenamiento=Entrenamiento,Validacion=Validacion)
rownames(ResultadosRMSE)<-nombres

Cálculo de la variación

ResultadosRMSE$Por_variacion<-((ResultadosRMSE$Validacion-ResultadosRMSE$Entrenamiento)/ResultadosRMSE$Entrenamiento)*100
ResultadosRMSE
##        Entrenamiento Validacion Por_variacion
## lm            44.036     63.668      44.58171
## knn           87.688     53.026     -39.52878
## glm           44.056     85.567      94.22326
## árbol         92.584     62.529     -32.46241
## bosque        75.796     58.314     -23.06454

2. Elección del modelo para Accidentes graves

Comparación en el entrenamiento

comparacion_tr<-data.frame(Ano_Sem=Total_Dataset_Freq_S$Ano_Sem[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
                          ACCIDENTESG=Total_Dataset_Freq_S$ACCIDENTES_GRAVES[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
                           lm= y_tr_pred_lm_s_m, 
                           knn= y_tr_pred_knn_s_m, 
                           glm=y_tr_pred_glm_s_m,
                           arbol=y_tr_pred_tree_s_m,
                           rf=y_tr_pred_rf_s_m)
plot_ly (data=comparacion_tr,
         x = ~Ano_Sem,
         y = ~ACCIDENTESG,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~lm,
            name='lm-',
            line=list(width=1,color= "blue"))%>%
  add_trace(y= ~knn,
            name='knn-',
            line=list(width=1,color="red"))%>%
  add_trace(y= ~glm,
            name='Modelo Poisson',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  add_trace(y= ~arbol,
            name='Árbol',
            line=list(width=1,color="green"))%>%
    add_trace(y= ~rf,
            name='Bosque',
            line=list(width=1,color='black'))%>%
  layout(title='Accidentes graves (Entrenamiento)',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.9))

Comparación en la validación

comparacion_vl<-data.frame(Ano_Sem=Test_S_Dataset$Ano_Sem,
                          ACCIDENTESG=Test_S_Dataset$ACCIDENTES_GRAVES,
                           lm= y_test_pred_lm_s_m, 
                           knn= y_test_pred_knn_s_m, 
                           glm=y_test_pred_glm_s_m,
                           arbol=y_test_pred_tree_s_m,
                           rf=y_test_pred_rf_s_m)
plot_ly (data=comparacion_vl,
         x = ~Ano_Sem,
         y = ~ACCIDENTESG,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~lm,
            name='lm',
            line=list(width=1,color= "blue"))%>%
  add_trace(y= ~knn,
            name='knn',
            line=list(width=1,color="red"))%>%
  add_trace(y= ~glm,
            name='Modelo Poisson',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  add_trace(y= ~arbol,
            name='Árbol',
            line=list(width=1,color="green"))%>%
  add_trace(y= ~rf,
            name='Bosque',
            line=list(width=1,color='rgb(255, 51, 153)'))%>%
  layout(title='Accidentes graves (Validación)',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.9))

Comparación con los RMSE:

Entrenamiento<-round(c(RMSE_tr_lm_s_m,RMSE_tr_knn_s_m,RMSE_tr_glm_s_m,RMSE_tr_tree_s_m,RMSE_tr_rf_s_m),3) 
Validacion<-round(c(RMSE_test_lm_s_m,RMSE_test_knn_s_m,RMSE_test_glm_s_m,RMSE_test_tree_s_m,RMSE_test_rf_s_m),3) 
nombres<-c("lm","knn","glm","árbol","bosque")
ResultadosRMSE<-data.frame(Entrenamiento=Entrenamiento,Validacion=Validacion)
rownames(ResultadosRMSE)<-nombres

Cálculo de la variación

ResultadosRMSE$Variacion<-((ResultadosRMSE$Validacion-ResultadosRMSE$Entrenamiento)/ResultadosRMSE$Entrenamiento)*100
ResultadosRMSE
##        Entrenamiento Validacion  Variacion
## lm            30.000     55.998  86.660000
## knn           50.479     44.620 -11.606807
## glm           29.953     47.756  59.436450
## árbol         51.982     49.940  -3.928283
## bosque        29.141     50.123  72.001647

3. Elección del modelo para Accidentes leves

Comparación en el entrenamiento

comparacion_tr<-data.frame(Ano_Sem=Total_Dataset_Freq_S$Ano_Sem[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
                          ACCIDENTESL=Total_Dataset_Freq_S$ACCIDENTES_LEVES[Total_Dataset_Freq_S$Ano_Sem<="2017_52"],
                           lm= y_tr_pred_lm_s_sd, 
                           knn= y_tr_pred_knn_s_sd, 
                           glm=y_tr_pred_glm_s_sd,
                           arbol=y_tr_pred_tree_s_sd,
                           rf=y_tr_pred_rf_s_sd)
plot_ly (data=comparacion_tr,
         x = ~Ano_Sem,
         y = ~ACCIDENTESL,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~lm,
            name='lm',
            line=list(width=1,color= "blue"))%>%
  add_trace(y= ~knn,
            name='knn',
            line=list(width=1,color="red"))%>%
  add_trace(y= ~glm,
            name='Modelo Poisson',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  add_trace(y= ~arbol,
            name='Árbo',
            line=list(width=1,color="green"))%>%
    add_trace(y= ~rf,
            name='Bosque',
            line=list(width=1,color='rgb(255, 51, 153)'))%>%
  layout(title='Accidentes leves (Entrenamiento)',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.9))

Comparación en la validación

comparacion_vl<-data.frame(Ano_Sem=Test_S_Dataset$Ano_Sem,
                          ACCIDENTESL=Test_S_Dataset$ACCIDENTES_GRAVES,
                           lm= y_test_pred_lm_s_sd, 
                           knn= y_test_pred_knn_s_sd, 
                           glm=y_test_pred_glm_s_sd,
                           arbol=y_test_pred_tree_s_sd,
                           rf=y_test_pred_rf_s_sd)
plot_ly (data=comparacion_vl,
         x = ~Ano_Sem,
         y = ~ACCIDENTESL,
         type = "scatter" ,mode = "lines",
         name='Real',
         line=list(width=1,color='rgb(205, 12, 24)'))%>%
  add_trace(y= ~lm,
            name='lm',
            line=list(width=1,color= "blue"))%>%
  add_trace(y= ~knn,
            name='knn',
            line=list(width=1,color="red"))%>%
  add_trace(y= ~glm,
            name='Modelo Poisson',
            line=list(width=1,color='rgb(22, 96, 167)'))%>%
  add_trace(y= ~arbol,
            name='Árbol',
            line=list(width=1,color="green"))%>%
  add_trace(y= ~rf,
            name='Bosque',
            line=list(width=1,color='rgb(255, 51, 153)'))%>%
  layout(title='Accidentes leves (Validación)',
         xaxis=list(title="Fecha"),
         yaxis=list(title="Accidentes"),
         legend = list(x = 1, y = 0.9))

Comparación con los RMSE:

Entrenamiento<-round(c(RMSE_tr_lm_s_sd,RMSE_tr_knn_s_sd,RMSE_tr_glm_s_sd,RMSE_tr_tree_s_sd,RMSE_tr_rf_s_sd),3) 
Validacion<-round(c(RMSE_test_lm_s_sd,RMSE_test_knn_s_sd,RMSE_test_glm_s_sd,RMSE_test_tree_s_sd,RMSE_test_rf_s_sd),3)
nombres<-c("lm","knn","glm","árbol","bosque")
ResultadosRMSE<-data.frame(Entrenamiento=Entrenamiento,Validacion=Validacion)
rownames(ResultadosRMSE)<-nombres

Cálculo de la variación

ResultadosRMSE$Variacion<-((ResultadosRMSE$Validacion-ResultadosRMSE$Entrenamiento)/ResultadosRMSE$Entrenamiento)*100
ResultadosRMSE
##        Entrenamiento Validacion Variacion
## lm            25.765     33.319  29.31884
## knn           50.141     41.293 -17.64624
## glm           25.878     47.341  82.93918
## árbol         49.718     42.026 -15.47126
## bosque        42.009     37.313 -11.17856

Modelos elegidos

Teniendo como criterio el mínimo RMSE en la muestra de validación se eligen los siguientes modelos:

Modelo de regresión lineal para predición de Total Accidentes

Se ajusta el modelo con todos los datos desde el 01-01-2014 al 31-12-2018

library(caret)
trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s_final = caret::train(TOTAL_ACCIDENTES∼Ano_Base+Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Total_Dataset_Freq_S,
                      method = "lm", trControl = trcntrl,
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut
## = 10, : These variables have zero variances: SEMANA53
## Warning in predict.lm(modelFit, newdata): prediction from a rank-deficient
## fit may be misleading
summary(caret_lm_fit_s_final)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -258.149  -30.832    2.753   32.259  161.851 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          799.336      3.242 246.566  < 2e-16 ***
## Ano_Base               1.399      3.250   0.431 0.667276    
## SEMANA02               8.473      4.731   1.791 0.074808 .  
## SEMANA03              26.585      4.653   5.713 3.88e-08 ***
## SEMANA04              25.603      4.669   5.483 1.23e-07 ***
## SEMANA05              29.168      4.669   6.247 2.40e-09 ***
## SEMANA06              33.773      4.669   7.233 9.34e-12 ***
## SEMANA07              38.736      4.669   8.296 1.45e-14 ***
## SEMANA08              34.267      4.669   7.339 5.01e-12 ***
## SEMANA09              33.061      4.669   7.081 2.27e-11 ***
## SEMANA10              38.352      4.669   8.214 2.43e-14 ***
## SEMANA11              37.831      4.669   8.102 4.88e-14 ***
## SEMANA12              33.825      4.739   7.137 1.64e-11 ***
## SEMANA13              34.379      4.713   7.294 6.53e-12 ***
## SEMANA14              38.750      4.724   8.202 2.62e-14 ***
## SEMANA15              36.913      4.724   7.813 2.89e-13 ***
## SEMANA16              31.073      4.724   6.577 3.95e-10 ***
## SEMANA17              39.069      4.589   8.513 3.67e-15 ***
## SEMANA18              37.867      4.576   8.276 1.65e-14 ***
## SEMANA19              37.442      4.653   8.047 6.88e-14 ***
## SEMANA20              36.290      4.653   7.799 3.15e-13 ***
## SEMANA21              36.674      4.653   7.882 1.90e-13 ***
## SEMANA22              31.487      4.658   6.760 1.42e-10 ***
## SEMANA23              36.087      4.684   7.704 5.64e-13 ***
## SEMANA24              34.475      4.658   7.401 3.47e-12 ***
## SEMANA25              29.156      4.658   6.259 2.24e-09 ***
## SEMANA26              21.754      4.658   4.670 5.45e-06 ***
## SEMANA27              28.454      4.799   5.929 1.28e-08 ***
## SEMANA28              32.430      4.669   6.946 4.95e-11 ***
## SEMANA29              38.727      4.658   8.314 1.30e-14 ***
## SEMANA30              37.140      4.653   7.982 1.03e-13 ***
## SEMANA31              40.645      5.069   8.019 8.20e-14 ***
## SEMANA32              35.309      5.372   6.572 4.06e-10 ***
## SEMANA33              38.593      4.653   8.294 1.47e-14 ***
## SEMANA34              32.791      4.731   6.931 5.39e-11 ***
## SEMANA35              36.625      4.669   7.844 2.40e-13 ***
## SEMANA36              35.473      4.669   7.597 1.07e-12 ***
## SEMANA37              39.613      4.669   8.484 4.42e-15 ***
## SEMANA38              39.860      4.669   8.537 3.16e-15 ***
## SEMANA39              34.240      4.669   7.333 5.19e-12 ***
## SEMANA40              39.805      4.669   8.525 3.41e-15 ***
## SEMANA41              27.660      4.669   5.924 1.32e-08 ***
## SEMANA42              32.704      4.799   6.815 1.04e-10 ***
## SEMANA43              33.965      4.669   7.274 7.34e-12 ***
## SEMANA44              36.378      4.669   7.791 3.32e-13 ***
## SEMANA45              27.961      4.799   5.827 2.18e-08 ***
## SEMANA46              35.045      4.684   7.481 2.15e-12 ***
## SEMANA47              33.516      4.658   7.195 1.17e-11 ***
## SEMANA48              30.538      4.669   6.540 4.84e-10 ***
## SEMANA49              37.653      4.584   8.215 2.42e-14 ***
## SEMANA50              37.062      4.576   8.100 4.94e-14 ***
## SEMANA51              40.797      4.589   8.890 3.27e-16 ***
## SEMANA52              21.504      4.653   4.621 6.75e-06 ***
## SEMANA53             -23.471      3.837  -6.118 4.77e-09 ***
## Feria_Flores_Semana   12.990      4.994   2.601 0.009979 ** 
## Semana_Santa_Semana  -19.077      6.063  -3.146 0.001901 ** 
## Feriados_Lunes       -17.462      4.685  -3.727 0.000251 ***
## Feriados_Otros       -21.096      6.934  -3.042 0.002656 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 52.47 on 204 degrees of freedom
## Multiple R-squared:  0.7685, Adjusted R-squared:  0.7039 
## F-statistic: 11.88 on 57 and 204 DF,  p-value: < 2.2e-16

Se guardan el modelo en un objeto de r

saveRDS(caret_lm_fit_s_final,"../Modelos/Prediccion_Total_Semanal.rds")
Modelo_Total_semanal<-readRDS(file="../Modelos/Prediccion_Total_Semanal.rds")
Modelo de Random Forest para predición de Accidentes Graves

Se ajusta el modelo de Random Forest con todos los datos desde el 01-01-2014 al 31-12-2018

trcntrl = trainControl(method="cv", number=10)
caret_rf_fit_s_m_final = caret::train(ACCIDENTES_GRAVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Total_Dataset_Freq_S,
                      method = "rf", trControl = trcntrl,
                      prox=TRUE,allowParallel=TRUE)
summary(caret_rf_fit_s_m_final)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted         262  -none-     numeric  
## mse               500  -none-     numeric  
## rsq               500  -none-     numeric  
## oob.times         262  -none-     numeric  
## importance         57  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity       68644  -none-     numeric  
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             11  -none-     list     
## coefs               0  -none-     NULL     
## y                 262  -none-     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames             57  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           1  -none-     logical  
## param               2  -none-     list

Se guardan el modelo en un objeto de r

saveRDS(caret_rf_fit_s_m_final,"../Modelos/Prediccion_Grave_Semanal.rds")
Modelo_Grave_semanal<-readRDS(file="../Modelos/Prediccion_Grave_Semanal.rds")
Modelo de regresión lineal para predición de Accidentes Leves

Se ajusta el modelo con todos los datos desde el 01-01-2014 al 31-12-2018

trcntrl = trainControl(method="cv", number=10)
caret_lm_fit_s_sd_final = caret::train(ACCIDENTES_LEVES∼Ano_Base+SEMANA+Feria_Flores_Semana+Semana_Santa_Semana+Feriados_Lunes+Feriados_Otros, data=Total_Dataset_Freq_S,
                      method = "lm", trControl = trcntrl,
                      preProcess=c("center", "scale"),
                      tuneLength = 10)
summary(caret_lm_fit_s_sd_final)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -116.080  -15.805    2.998   17.233   92.920 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          358.676      1.860 192.862  < 2e-16 ***
## Ano_Base               6.327      1.864   3.394 0.000828 ***
## SEMANA02               7.029      2.714   2.590 0.010303 *  
## SEMANA03              14.377      2.669   5.386 1.98e-07 ***
## SEMANA04              15.775      2.679   5.889 1.58e-08 ***
## SEMANA05              17.036      2.679   6.360 1.30e-09 ***
## SEMANA06              17.639      2.679   6.585 3.77e-10 ***
## SEMANA07              22.903      2.679   8.551 2.90e-15 ***
## SEMANA08              18.845      2.679   7.036 2.95e-11 ***
## SEMANA09              18.763      2.679   7.005 3.52e-11 ***
## SEMANA10              20.024      2.679   7.476 2.22e-12 ***
## SEMANA11              19.257      2.679   7.189 1.21e-11 ***
## SEMANA12              18.640      2.719   6.856 8.25e-11 ***
## SEMANA13              17.872      2.704   6.610 3.29e-10 ***
## SEMANA14              21.710      2.710   8.011 8.60e-14 ***
## SEMANA15              21.464      2.710   7.920 1.51e-13 ***
## SEMANA16              17.077      2.710   6.301 1.79e-09 ***
## SEMANA17              22.775      2.633   8.651 1.53e-15 ***
## SEMANA18              21.640      2.625   8.244 2.01e-14 ***
## SEMANA19              21.943      2.669   8.221 2.33e-14 ***
## SEMANA20              21.943      2.669   8.221 2.33e-14 ***
## SEMANA21              20.298      2.669   7.605 1.03e-12 ***
## SEMANA22              17.091      2.672   6.396 1.07e-09 ***
## SEMANA23              18.982      2.687   7.064 2.50e-11 ***
## SEMANA24              18.681      2.672   6.991 3.81e-11 ***
## SEMANA25              16.460      2.672   6.160 3.82e-09 ***
## SEMANA26              13.664      2.672   5.113 7.27e-07 ***
## SEMANA27              15.418      2.753   5.601 6.85e-08 ***
## SEMANA28              18.489      2.679   6.903 6.33e-11 ***
## SEMANA29              21.758      2.672   8.143 3.79e-14 ***
## SEMANA30              20.381      2.669   7.635 8.54e-13 ***
## SEMANA31              23.027      2.908   7.919 1.52e-13 ***
## SEMANA32              21.415      3.082   6.949 4.86e-11 ***
## SEMANA33              21.285      2.669   7.974 1.08e-13 ***
## SEMANA34              17.803      2.714   6.559 4.36e-10 ***
## SEMANA35              18.133      2.679   6.770 1.35e-10 ***
## SEMANA36              19.832      2.679   7.404 3.41e-12 ***
## SEMANA37              21.560      2.679   8.049 6.79e-14 ***
## SEMANA38              22.656      2.679   8.458 5.20e-15 ***
## SEMANA39              18.983      2.679   7.087 2.19e-11 ***
## SEMANA40              23.259      2.679   8.684 1.24e-15 ***
## SEMANA41              18.078      2.679   6.749 1.51e-10 ***
## SEMANA42              18.818      2.753   6.836 9.26e-11 ***
## SEMANA43              20.326      2.679   7.588 1.13e-12 ***
## SEMANA44              21.806      2.679   8.141 3.83e-14 ***
## SEMANA45              17.776      2.753   6.457 7.66e-10 ***
## SEMANA46              23.917      2.687   8.900 3.05e-16 ***
## SEMANA47              20.984      2.672   7.853 2.28e-13 ***
## SEMANA48              19.531      2.679   7.292 6.63e-12 ***
## SEMANA49              25.424      2.629   9.669  < 2e-16 ***
## SEMANA50              24.694      2.625   9.408  < 2e-16 ***
## SEMANA51              25.873      2.633   9.828  < 2e-16 ***
## SEMANA52              12.327      2.669   4.618 6.85e-06 ***
## SEMANA53              -7.951      2.201  -3.613 0.000382 ***
## Feria_Flores_Semana    5.950      2.865   2.077 0.039095 *  
## Semana_Santa_Semana   -8.482      3.478  -2.439 0.015601 *  
## Feriados_Lunes        -9.792      2.688  -3.643 0.000342 ***
## Feriados_Otros       -11.805      3.978  -2.968 0.003359 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 30.1 on 204 degrees of freedom
## Multiple R-squared:  0.7417, Adjusted R-squared:  0.6695 
## F-statistic: 10.27 on 57 and 204 DF,  p-value: < 2.2e-16

Se guardan el modelo en un objeto de r

saveRDS(caret_lm_fit_s_sd_final,"../Modelos/Prediccion_leves_Semanal.rds")
Modelo_leves_semanal<-readRDS(file="../Modelos/Prediccion_leves_Semanal.rds")
Datos para pronóstico

Se oganizan los datos necesarios para el pronóstico de los accidentes en los años 2019, 2020 y 2021

Importación de los datos

load("../data/Dias_Especiales_Semanal.Rda")
datos_pronostico_semanal<-Dias_Especiales_Semanal[,c("ANO","Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")]
datos_pronostico_semanal$ANO1 <- datos_pronostico_semanal$ANO
datos_pronostico_semanal$SEMANA1 <- datos_pronostico_semanal$SEMANA
library(dplyr)
datos_pronostico_semanal<-unite_(datos_pronostico_semanal, "Ano_Sem", c("ANO1","SEMANA1"))

Predicción del Total de accidentes con el modelo de regresión lineal

datos_pronostico_semanal$prediccion_Total_s<-predict(Modelo_Total_semanal,datos_pronostico_semanal[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])

Predicción de accidentes graves con el modelo de árbol de regresión

datos_pronostico_semanal$prediccion_Graves_s<-predict(Modelo_Grave_semanal,datos_pronostico_semanal[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])

Predicción de accidentes leves con el modelo de regresión lineal

datos_pronostico_semanal$prediccion_Leves_s<-predict(Modelo_leves_semanal,datos_pronostico_semanal[,c("Ano_Base","SEMANA","Feria_Flores_Semana","Semana_Santa_Semana","Feriados_Lunes","Feriados_Otros")])

Se guardan los datos de pronóstico en un objeto de r

save(datos_pronostico_semanal,file="../Modelos/datos_pronostico_semanal.Rda")